qwen_agent/skills/kfs-answer/scripts/merge_citations.py
2026-04-17 17:02:11 +09:00

57 lines
1.8 KiB
Python

"""Merge accumulated citation data into final CITATION tags.
Usage: python3 merge_citations.py
Reads {session_dir}/citations.jsonl (appended by query.py / query_db.py),
merges rows by (file, sheet), outputs one <CITATION .../> tag per combination.
Agent calls this ONCE before composing the final answer (Step 4).
"""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from _session import get_session_dir
def main():
citations_path = os.path.join(get_session_dir(), "citations.jsonl")
if not os.path.isfile(citations_path):
print("[NO CITATIONS]")
return
# Read all citation entries
groups = {} # (file, sheet) -> {"filename": str, "rows": set}
with open(citations_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
entry = json.loads(line)
key = (entry["file"], entry["sheet"])
if key not in groups:
groups[key] = {"filename": entry.get("filename", ""), "rows": set()}
for r in entry.get("rows", []):
groups[key]["rows"].add(r)
if not groups:
print("[NO CITATIONS]")
return
print("[CITATIONS]")
for (file_id, sheet_num) in sorted(groups.keys()):
info = groups[(file_id, sheet_num)]
fn_attr = f' filename="{info["filename"]}"' if info["filename"] else ""
rows = sorted(info["rows"])
if rows:
rows_str = "[" + ", ".join(str(r) for r in rows) + "]"
print(f'<CITATION file="{file_id}"{fn_attr} sheet="{sheet_num}" rows="{rows_str}" />')
else:
# Sheet-level citation (markdown, no rows)
print(f'<CITATION file="{file_id}"{fn_attr} sheet="{sheet_num}" />')
if __name__ == "__main__":
main()