Merge branch 'onprem-release' into dev
This commit is contained in:
commit
52a700e0db
@ -29,6 +29,49 @@ from mcp_common import (
|
|||||||
BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
|
BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
|
||||||
MASTERKEY = os.getenv("MASTERKEY", "master")
|
MASTERKEY = os.getenv("MASTERKEY", "master")
|
||||||
|
|
||||||
|
# Citation instruction prefixes injected into tool results
|
||||||
|
DOCUMENT_CITATION_INSTRUCTIONS = """<CITATION_INSTRUCTIONS>
|
||||||
|
When using the retrieved knowledge below, you MUST add XML citation tags for factual claims.
|
||||||
|
|
||||||
|
## Document Knowledge
|
||||||
|
Format: `<CITATION file="file_uuid" filename="name.pdf" page=3 />`
|
||||||
|
- Use `file` attribute with the UUID from document markers
|
||||||
|
- Use `filename` attribute with the actual filename from document markers
|
||||||
|
- Use `page` attribute (singular) with the page number
|
||||||
|
- `page` MUST be 0-based and must match the `pages:` values shown in the learned knowledge context
|
||||||
|
|
||||||
|
## Web Page Knowledge
|
||||||
|
Format: `<CITATION url="https://example.com/page" />`
|
||||||
|
- Use `url` attribute with the web page URL from the source metadata
|
||||||
|
- Do not use `file`, `filename`, or `page` attributes for web sources
|
||||||
|
- If content is grounded in a web source, prefer a web citation with `url` over a file citation
|
||||||
|
|
||||||
|
## Placement Rules
|
||||||
|
- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
|
||||||
|
- NEVER collect all citations and place them at the end of your response
|
||||||
|
- Limit to 1-2 citations per paragraph/bullet list
|
||||||
|
- If your answer uses learned knowledge, you MUST generate at least 1 `<CITATION ... />` in the response
|
||||||
|
</CITATION_INSTRUCTIONS>
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
TABLE_CITATION_INSTRUCTIONS = """<CITATION_INSTRUCTIONS>
|
||||||
|
When using the retrieved table knowledge below, you MUST add XML citation tags for factual claims.
|
||||||
|
|
||||||
|
Format: `<CITATION file="file_id" filename="name.xlsx" sheet=1 rows=[2, 4] />`
|
||||||
|
- Parse `__src`: `F1S2R5` = file_ref F1, sheet 2, row 5
|
||||||
|
- Look up file_id in `file_ref_table`
|
||||||
|
- Combine same-sheet rows into one citation: `rows=[2, 4, 6]`
|
||||||
|
- MANDATORY: Create SEPARATE citation for EACH (file, sheet) combination
|
||||||
|
- NEVER put <CITATION> on the same line as a bullet point or table row
|
||||||
|
- Citations MUST be on separate lines AFTER the complete list/table
|
||||||
|
- NEVER include the `__src` column in your response - it is internal metadata only
|
||||||
|
- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
|
||||||
|
- NEVER collect all citations and place them at the end of your response
|
||||||
|
</CITATION_INSTRUCTIONS>
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
|
def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
|
||||||
"""调用RAG检索API"""
|
"""调用RAG检索API"""
|
||||||
try:
|
try:
|
||||||
@ -94,7 +137,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
|
|||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": markdown_content
|
"text": DOCUMENT_CITATION_INSTRUCTIONS + markdown_content
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -107,7 +150,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
return {
|
return {
|
||||||
"content": [
|
"content": [
|
||||||
@ -179,7 +222,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
|
|||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": markdown_content
|
"text": TABLE_CITATION_INSTRUCTIONS + markdown_content
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,16 @@
|
|||||||
{extra_prompt}
|
{extra_prompt}
|
||||||
|
|
||||||
|
## CITATION REQUIREMENTS
|
||||||
|
|
||||||
|
When your answer uses learned knowledge, you MUST generate `<CITATION ... />` tags. Follow the specific citation format instructions returned by each tool (`rag_retrieve`, `table_rag_retrieve`).
|
||||||
|
|
||||||
|
### General Placement Rules
|
||||||
|
1. Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge
|
||||||
|
2. NEVER collect all citations and place them at the end of your response
|
||||||
|
3. Limit to 1-2 citations per paragraph/bullet list - combine related facts under one citation
|
||||||
|
4. If your answer uses learned knowledge, you MUST generate at least 1 `<CITATION ... />` in the response
|
||||||
|
|
||||||
|
|
||||||
### Current Working Directory
|
### Current Working Directory
|
||||||
|
|
||||||
PROJECT_ROOT: `{agent_dir_path}`
|
PROJECT_ROOT: `{agent_dir_path}`
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user