This commit is contained in:
朱潮 2026-02-10 21:41:16 +08:00
parent 4e4052874a
commit 5f6e806b18
3 changed files with 17 additions and 26 deletions

View File

@ -3,7 +3,7 @@
"mcpServers": { "mcpServers": {
"rag_retrieve": { "rag_retrieve": {
"transport": "http", "transport": "http",
"url": "http://100.77.70.35:9382/mcp/", "url": "http://host.docker.internal:9382/mcp/",
"headers": { "headers": {
"api_key": "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8", "api_key": "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8",
"X-Dataset-Ids": "{dataset_ids}" "X-Dataset-Ids": "{dataset_ids}"

View File

@ -206,35 +206,26 @@ class RAGFlowConnector:
res = res.json() res = res.json()
if res.get("code") == 0: if res.get("code") == 0:
data = res["data"] data = res["data"]
chunks = []
# Cache document metadata and dataset information # Cache document metadata and dataset information
document_cache, dataset_cache = await self._get_document_metadata_cache(dataset_ids, api_key=api_key, force_refresh=force_refresh) document_cache, dataset_cache = await self._get_document_metadata_cache(dataset_ids, api_key=api_key, force_refresh=force_refresh)
# Process chunks with enhanced field mapping including per-chunk metadata # Build markdown response with only required fields
markdown_lines = []
for chunk_data in data.get("chunks", []): for chunk_data in data.get("chunks", []):
enhanced_chunk = self._map_chunk_fields(chunk_data, dataset_cache, document_cache) enhanced_chunk = self._map_chunk_fields(chunk_data, dataset_cache, document_cache)
chunks.append(enhanced_chunk) document_name = enhanced_chunk.get("document_name", enhanced_chunk.get("document_keyword", ""))
content = enhanced_chunk.get("content", "")
document_id = enhanced_chunk.get("document_id", "")
# Build structured response (no longer need response-level document_metadata) markdown_lines.append(f"**document_id**: {document_id}")
response = { markdown_lines.append(f"{document_name}:")
"chunks": chunks, markdown_lines.append(f"{content}")
"pagination": { markdown_lines.append("---")
"page": data.get("page", page),
"page_size": data.get("page_size", page_size),
"total_chunks": data.get("total", len(chunks)),
"total_pages": (data.get("total", len(chunks)) + page_size - 1) // page_size,
},
"query_info": {
"question": question,
"similarity_threshold": similarity_threshold,
"vector_weight": vector_similarity_weight,
"keyword_search": keyword,
"dataset_count": len(dataset_ids),
},
}
return [types.TextContent(type="text", text=json.dumps(response, ensure_ascii=False))] markdown_output = "\n".join(markdown_lines)
return [types.TextContent(type="text", text=markdown_output)]
raise Exception([types.TextContent(type="text", text=res.get("message"))]) raise Exception([types.TextContent(type="text", text=res.get("message"))])
@ -518,12 +509,12 @@ def with_api_key(required: bool = True):
@app.list_tools() @app.list_tools()
@with_api_key(required=True) @with_api_key(required=True)
async def list_tools(*, connector: RAGFlowConnector, api_key: str) -> list[types.Tool]: async def list_tools(*, connector: RAGFlowConnector, api_key: str, request: Any = None) -> list[types.Tool]:
dataset_description = await connector.list_datasets(api_key=api_key) dataset_description = await connector.list_datasets(api_key=api_key)
return [ return [
types.Tool( types.Tool(
name="ragflow_retrieval", name="rag_retrieve",
description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:" description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:"
+ dataset_description, + dataset_description,
inputSchema={ inputSchema={
@ -597,7 +588,7 @@ async def call_tool(
api_key: str, api_key: str,
request: Any = None, request: Any = None,
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
if name == "ragflow_retrieval": if name == "rag_retrieve":
document_ids = arguments.get("document_ids", []) document_ids = arguments.get("document_ids", [])
dataset_ids = arguments.get("dataset_ids", []) dataset_ids = arguments.get("dataset_ids", [])
question = arguments.get("question", "") question = arguments.get("question", "")

View File

@ -87,7 +87,7 @@ os.environ["OPENAI_API_KEY"] = "your_api_key"
# ============================================================ # ============================================================
# RAGFlow API 配置 # RAGFlow API 配置
RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://100.77.70.35:1080") RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://host.docker.internal:1080")
RAGFLOW_API_KEY = os.getenv("RAGFLOW_API_KEY", "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8") RAGFLOW_API_KEY = os.getenv("RAGFLOW_API_KEY", "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8")
# 文件上传配置 # 文件上传配置