This commit is contained in:
朱潮 2026-02-10 21:41:16 +08:00
parent 4e4052874a
commit 5f6e806b18
3 changed files with 17 additions and 26 deletions

View File

@ -3,7 +3,7 @@
"mcpServers": {
"rag_retrieve": {
"transport": "http",
"url": "http://100.77.70.35:9382/mcp/",
"url": "http://host.docker.internal:9382/mcp/",
"headers": {
"api_key": "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8",
"X-Dataset-Ids": "{dataset_ids}"

View File

@ -206,35 +206,26 @@ class RAGFlowConnector:
res = res.json()
if res.get("code") == 0:
data = res["data"]
chunks = []
# Cache document metadata and dataset information
document_cache, dataset_cache = await self._get_document_metadata_cache(dataset_ids, api_key=api_key, force_refresh=force_refresh)
# Process chunks with enhanced field mapping including per-chunk metadata
# Build markdown response with only required fields
markdown_lines = []
for chunk_data in data.get("chunks", []):
enhanced_chunk = self._map_chunk_fields(chunk_data, dataset_cache, document_cache)
chunks.append(enhanced_chunk)
document_name = enhanced_chunk.get("document_name", enhanced_chunk.get("document_keyword", ""))
content = enhanced_chunk.get("content", "")
document_id = enhanced_chunk.get("document_id", "")
# Build structured response (no longer need response-level document_metadata)
response = {
"chunks": chunks,
"pagination": {
"page": data.get("page", page),
"page_size": data.get("page_size", page_size),
"total_chunks": data.get("total", len(chunks)),
"total_pages": (data.get("total", len(chunks)) + page_size - 1) // page_size,
},
"query_info": {
"question": question,
"similarity_threshold": similarity_threshold,
"vector_weight": vector_similarity_weight,
"keyword_search": keyword,
"dataset_count": len(dataset_ids),
},
}
markdown_lines.append(f"**document_id**: {document_id}")
markdown_lines.append(f"{document_name}:")
markdown_lines.append(f"{content}")
markdown_lines.append("---")
return [types.TextContent(type="text", text=json.dumps(response, ensure_ascii=False))]
markdown_output = "\n".join(markdown_lines)
return [types.TextContent(type="text", text=markdown_output)]
raise Exception([types.TextContent(type="text", text=res.get("message"))])
@ -518,12 +509,12 @@ def with_api_key(required: bool = True):
@app.list_tools()
@with_api_key(required=True)
async def list_tools(*, connector: RAGFlowConnector, api_key: str) -> list[types.Tool]:
async def list_tools(*, connector: RAGFlowConnector, api_key: str, request: Any = None) -> list[types.Tool]:
dataset_description = await connector.list_datasets(api_key=api_key)
return [
types.Tool(
name="ragflow_retrieval",
name="rag_retrieve",
description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:"
+ dataset_description,
inputSchema={
@ -597,7 +588,7 @@ async def call_tool(
api_key: str,
request: Any = None,
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
if name == "ragflow_retrieval":
if name == "rag_retrieve":
document_ids = arguments.get("document_ids", [])
dataset_ids = arguments.get("dataset_ids", [])
question = arguments.get("question", "")

View File

@ -87,7 +87,7 @@ os.environ["OPENAI_API_KEY"] = "your_api_key"
# ============================================================
# RAGFlow API 配置
RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://100.77.70.35:1080")
RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://host.docker.internal:1080")
RAGFLOW_API_KEY = os.getenv("RAGFLOW_API_KEY", "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8")
# 文件上传配置