57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
"""Merge accumulated citation data into final CITATION tags.
|
|
|
|
Usage: python3 merge_citations.py
|
|
|
|
Reads {session_dir}/citations.jsonl (appended by query.py / query_db.py),
|
|
merges rows by (file, sheet), outputs one <CITATION .../> tag per combination.
|
|
|
|
Agent calls this ONCE before composing the final answer (Step 4).
|
|
"""
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from _session import get_session_dir
|
|
|
|
|
|
def main():
|
|
citations_path = os.path.join(get_session_dir(), "citations.jsonl")
|
|
if not os.path.isfile(citations_path):
|
|
print("[NO CITATIONS]")
|
|
return
|
|
|
|
# Read all citation entries
|
|
groups = {} # (file, sheet) -> {"filename": str, "rows": set}
|
|
with open(citations_path, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
entry = json.loads(line)
|
|
key = (entry["file"], entry["sheet"])
|
|
if key not in groups:
|
|
groups[key] = {"filename": entry.get("filename", ""), "rows": set()}
|
|
for r in entry.get("rows", []):
|
|
groups[key]["rows"].add(r)
|
|
|
|
if not groups:
|
|
print("[NO CITATIONS]")
|
|
return
|
|
|
|
print("[CITATIONS]")
|
|
for (file_id, sheet_num) in sorted(groups.keys()):
|
|
info = groups[(file_id, sheet_num)]
|
|
fn_attr = f' filename="{info["filename"]}"' if info["filename"] else ""
|
|
rows = sorted(info["rows"])
|
|
if rows:
|
|
rows_str = "[" + ", ".join(str(r) for r in rows) + "]"
|
|
print(f'<CITATION file="{file_id}"{fn_attr} sheet="{sheet_num}" rows="{rows_str}" />')
|
|
else:
|
|
# Sheet-level citation (markdown, no rows)
|
|
print(f'<CITATION file="{file_id}"{fn_attr} sheet="{sheet_num}" />')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|