Merge branch 'onprem-release' into dev

This commit is contained in:
朱潮 2026-03-30 21:21:37 +08:00
commit 52a700e0db
2 changed files with 57 additions and 3 deletions

View File

@ -29,6 +29,49 @@ from mcp_common import (
BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
MASTERKEY = os.getenv("MASTERKEY", "master")
# Citation instruction prefixes injected into tool results
DOCUMENT_CITATION_INSTRUCTIONS = """<CITATION_INSTRUCTIONS>
When using the retrieved knowledge below, you MUST add XML citation tags for factual claims.
## Document Knowledge
Format: `<CITATION file="file_uuid" filename="name.pdf" page=3 />`
- Use `file` attribute with the UUID from document markers
- Use `filename` attribute with the actual filename from document markers
- Use `page` attribute (singular) with the page number
- `page` MUST be 0-based and must match the `pages:` values shown in the learned knowledge context
## Web Page Knowledge
Format: `<CITATION url="https://example.com/page" />`
- Use `url` attribute with the web page URL from the source metadata
- Do not use `file`, `filename`, or `page` attributes for web sources
- If content is grounded in a web source, prefer a web citation with `url` over a file citation
## Placement Rules
- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
- NEVER collect all citations and place them at the end of your response
- Limit to 1-2 citations per paragraph/bullet list
- If your answer uses learned knowledge, you MUST generate at least 1 `<CITATION ... />` in the response
</CITATION_INSTRUCTIONS>
"""
TABLE_CITATION_INSTRUCTIONS = """<CITATION_INSTRUCTIONS>
When using the retrieved table knowledge below, you MUST add XML citation tags for factual claims.
Format: `<CITATION file="file_id" filename="name.xlsx" sheet=1 rows=[2, 4] />`
- Parse `__src`: `F1S2R5` = file_ref F1, sheet 2, row 5
- Look up file_id in `file_ref_table`
- Combine same-sheet rows into one citation: `rows=[2, 4, 6]`
- MANDATORY: Create SEPARATE citation for EACH (file, sheet) combination
- NEVER put <CITATION> on the same line as a bullet point or table row
- Citations MUST be on separate lines AFTER the complete list/table
- NEVER include the `__src` column in your response - it is internal metadata only
- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
- NEVER collect all citations and place them at the end of your response
</CITATION_INSTRUCTIONS>
"""
def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
"""调用RAG检索API"""
try:
@ -94,7 +137,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
"content": [
{
"type": "text",
"text": markdown_content
"text": DOCUMENT_CITATION_INSTRUCTIONS + markdown_content
}
]
}
@ -107,7 +150,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
}
]
}
except requests.exceptions.RequestException as e:
return {
"content": [
@ -179,7 +222,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
"content": [
{
"type": "text",
"text": markdown_content
"text": TABLE_CITATION_INSTRUCTIONS + markdown_content
}
]
}

View File

@ -1,5 +1,16 @@
{extra_prompt}
## CITATION REQUIREMENTS
When your answer uses learned knowledge, you MUST generate `<CITATION ... />` tags. Follow the specific citation format instructions returned by each tool (`rag_retrieve`, `table_rag_retrieve`).
### General Placement Rules
1. Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
2. NEVER collect all citations and place them at the end of your response
3. Limit to 1-2 citations per paragraph/bullet list - combine related facts under one citation
4. If your answer uses learned knowledge, you MUST generate at least 1 `<CITATION ... />` in the response
### Current Working Directory
PROJECT_ROOT: `{agent_dir_path}`