diff --git a/mcp/rag_retrieve_server.py b/mcp/rag_retrieve_server.py
index e44d50d..80a659f 100644
--- a/mcp/rag_retrieve_server.py
+++ b/mcp/rag_retrieve_server.py
@@ -29,6 +29,49 @@ from mcp_common import (
BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
MASTERKEY = os.getenv("MASTERKEY", "master")
+# Citation instruction prefixes injected into tool results
+DOCUMENT_CITATION_INSTRUCTIONS = """
+When using the retrieved knowledge below, you MUST add XML citation tags for factual claims.
+
+## Document Knowledge
+Format: ``
+- Use `file` attribute with the UUID from document markers
+- Use `filename` attribute with the actual filename from document markers
+- Use `page` attribute (singular) with the page number
+- `page` MUST be 0-based and must match the `pages:` values shown in the learned knowledge context
+
+## Web Page Knowledge
+Format: ``
+- Use `url` attribute with the web page URL from the source metadata
+- Do not use `file`, `filename`, or `page` attributes for web sources
+- If content is grounded in a web source, prefer a web citation with `url` over a file citation
+
+## Placement Rules
+- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
+- NEVER collect all citations and place them at the end of your response
+- Limit to 1-2 citations per paragraph/bullet list
+- If your answer uses learned knowledge, you MUST generate at least 1 `` in the response
+
+
+"""
+
+TABLE_CITATION_INSTRUCTIONS = """
+When using the retrieved table knowledge below, you MUST add XML citation tags for factual claims.
+
+Format: ``
+- Parse `__src`: `F1S2R5` = file_ref F1, sheet 2, row 5
+- Look up file_id in `file_ref_table`
+- Combine same-sheet rows into one citation: `rows=[2, 4, 6]`
+- MANDATORY: Create SEPARATE citation for EACH (file, sheet) combination
+- NEVER put on the same line as a bullet point or table row
+- Citations MUST be on separate lines AFTER the complete list/table
+- NEVER include the `__src` column in your response - it is internal metadata only
+- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
+- NEVER collect all citations and place them at the end of your response
+
+
+"""
+
def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
"""调用RAG检索API"""
try:
@@ -94,7 +137,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
"content": [
{
"type": "text",
- "text": markdown_content
+ "text": DOCUMENT_CITATION_INSTRUCTIONS + markdown_content
}
]
}
@@ -107,7 +150,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
}
]
}
-
+
except requests.exceptions.RequestException as e:
return {
"content": [
@@ -179,7 +222,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
"content": [
{
"type": "text",
- "text": markdown_content
+ "text": TABLE_CITATION_INSTRUCTIONS + markdown_content
}
]
}
diff --git a/prompt/system_prompt.md b/prompt/system_prompt.md
index 8162c7f..c978779 100644
--- a/prompt/system_prompt.md
+++ b/prompt/system_prompt.md
@@ -2,83 +2,13 @@
## CITATION REQUIREMENTS
-### A. Regular Document Knowledge
-When answering questions based on `rag_retrieve` tool results, you MUST add XML citation tags for factual claims derived from the knowledge base.
+When your answer uses learned knowledge, you MUST generate `` tags. Follow the specific citation format instructions returned by each tool (`rag_retrieve`, `table_rag_retrieve`).
-**Format:** ``
-- Use `file` attribute with the UUID from document markers
-- Use `filename` attribute with the actual filename from document markers
-- Use `page` attribute (singular) with the page number
-- `page` MUST be 0-based and must match the `pages:` values shown in the learned knowledge context
-
-### B. Table Knowledge (TABLE_KNOWLEDGE BEGIN/END)
-When answering questions based on `table_rag_retrieve` tool results, you MUST add XML citation tags for factual claims derived from the knowledge base.
-
-**!!! CRITICAL RULE: NEVER put on same line as bullet/row !!!**
-**Citations MUST be on separate lines AFTER the complete list/table.**
-**NEVER include the `__src` column in your response - it is internal metadata only.**
-
-Format: ``
-- Parse `__src`: `F1S2R5` = file_ref F1, sheet 2, row 5
-- Look up file_id in `file_ref_table`
-- Combine same-sheet rows into one citation: `rows=[2, 4, 6]`
-- **MANDATORY: Create SEPARATE citation for EACH (file, sheet) combination**
-
-✅ CORRECT (data from sheet 1 AND sheet 2 = 2 citations):
-1. Liam - male
-2. Noah - male
-3. Ethan - male
-4. Mason - male
-5. William - male
-
-
-
-❌ WRONG (citation on same line):
-1. Liam - male
-❌ WRONG (missing sheet 2 citation):
-...only 1 citation when data comes from 2 sheets...
-
-
-### C. Web Page Knowledge
-
-**Format:** ``
-- Use `url` attribute with the web page URL from the source metadata
-- Do not use `file`, `filename`, or `page` attributes for web sources
-- Web citations should appear immediately after the content they reference
-
-**!!! CRITICAL PLACEMENT RULES !!!**
-1. **Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list** that uses the knowledge
-2. **NEVER collect all citations and place them at the end of your response**
-3. **Limit to 1-2 citations per paragraph/bullet list** - combine related facts under one citation
-4. **If your answer uses learned knowledge, you MUST generate at least 1 `` in the response**
-5. **If any paragraph or bullet list is grounded in a web source, prefer a web citation with `url` over a file citation**
-
-✅ CORRECT (citation immediately after paragraph):
-氣候變遷的影響包括世界平均氣溫持續上升,2024年為有紀錄以來最熱的一年。
-
-具體影響包括:
-- 極端高溫事件頻率增加
-- 海洋熱浪
-- 暴雨強度和頻率增強
-
-✅ CORRECT (web citation):
-MIMURE位于东京港区高轮,是一家综合性商业设施。
-
-❌ WRONG (all citations at the end):
-氣候變遷的影響包括...(long response)...
-
-
-
-
-(13 citations dumped at the end)
-
-❌ WRONG (web citation with file attributes):
-MIMURE位于东京港区高轮,是一家综合性商业设施。
-
-❌ WRONG (too many citations for short content):
-2024年全球氣溫上升。
-世界各地發生災害。
-沙烏地阿拉伯熱浪。
+### General Placement Rules
+1. Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
+2. NEVER collect all citations and place them at the end of your response
+3. Limit to 1-2 citations per paragraph/bullet list - combine related facts under one citation
+4. If your answer uses learned knowledge, you MUST generate at least 1 `` in the response
### Current Working Directory