"""Merge accumulated citation data into final CITATION tags. Usage: python3 merge_citations.py Reads {session_dir}/citations.jsonl (appended by query.py / query_db.py), merges rows by (file, sheet), outputs one tag per combination. Agent calls this ONCE before composing the final answer (Step 4). """ import json import os import sys sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from _session import get_session_dir def main(): citations_path = os.path.join(get_session_dir(), "citations.jsonl") if not os.path.isfile(citations_path): print("[NO CITATIONS]") return # Read all citation entries groups = {} # (file, sheet) -> {"filename": str, "rows": set} with open(citations_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue entry = json.loads(line) key = (entry["file"], entry["sheet"]) if key not in groups: groups[key] = {"filename": entry.get("filename", ""), "rows": set()} for r in entry.get("rows", []): groups[key]["rows"].add(r) if not groups: print("[NO CITATIONS]") return print("[CITATIONS]") for (file_id, sheet_num) in sorted(groups.keys()): info = groups[(file_id, sheet_num)] fn_attr = f' filename="{info["filename"]}"' if info["filename"] else "" rows = sorted(info["rows"]) if rows: rows_str = "[" + ", ".join(str(r) for r in rows) + "]" print(f'') else: # Sheet-level citation (markdown, no rows) print(f'') if __name__ == "__main__": main()