rag_flow

2026-02-10 21:41:16 +08:00 · 2026-02-10 21:41:16 +08:00 · 5f6e806b18
commit 5f6e806b18
parent 4e4052874a
3 changed files with 17 additions and 26 deletions
--- a/mcp/mcp_settings.json
+++ b/mcp/mcp_settings.json
@ -3,7 +3,7 @@
    "mcpServers": {
      "rag_retrieve": {
        "transport": "http",
-        "url": "http://100.77.70.35:9382/mcp/",
+        "url": "http://host.docker.internal:9382/mcp/",
        "headers": {
          "api_key": "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8",
          "X-Dataset-Ids": "{dataset_ids}"
--- a/mcp/rag_flow_server.py
+++ b/mcp/rag_flow_server.py
@ -206,35 +206,26 @@ class RAGFlowConnector:
        res = res.json()
        if res.get("code") == 0:
            data = res["data"]
-            chunks = []

            # Cache document metadata and dataset information
            document_cache, dataset_cache = await self._get_document_metadata_cache(dataset_ids, api_key=api_key, force_refresh=force_refresh)

-            # Process chunks with enhanced field mapping including per-chunk metadata
+            # Build markdown response with only required fields
+            markdown_lines = []
            for chunk_data in data.get("chunks", []):
                enhanced_chunk = self._map_chunk_fields(chunk_data, dataset_cache, document_cache)
-                chunks.append(enhanced_chunk)
+                document_name = enhanced_chunk.get("document_name", enhanced_chunk.get("document_keyword", ""))
+                content = enhanced_chunk.get("content", "")
+                document_id = enhanced_chunk.get("document_id", "")

-            # Build structured response (no longer need response-level document_metadata)
-            response = {
-                "chunks": chunks,
-                "pagination": {
-                    "page": data.get("page", page),
-                    "page_size": data.get("page_size", page_size),
-                    "total_chunks": data.get("total", len(chunks)),
-                    "total_pages": (data.get("total", len(chunks)) + page_size - 1) // page_size,
-                },
-                "query_info": {
-                    "question": question,
-                    "similarity_threshold": similarity_threshold,
-                    "vector_weight": vector_similarity_weight,
-                    "keyword_search": keyword,
-                    "dataset_count": len(dataset_ids),
-                },
-            }
+                markdown_lines.append(f"**document_id**: {document_id}")
+                markdown_lines.append(f"{document_name}:")
+                markdown_lines.append(f"{content}")
+                markdown_lines.append("---")

-            return [types.TextContent(type="text", text=json.dumps(response, ensure_ascii=False))]
+            markdown_output = "\n".join(markdown_lines)
+
+            return [types.TextContent(type="text", text=markdown_output)]

        raise Exception([types.TextContent(type="text", text=res.get("message"))])

@ -518,12 +509,12 @@ def with_api_key(required: bool = True):

@app.list_tools()
@with_api_key(required=True)
-async def list_tools(*, connector: RAGFlowConnector, api_key: str) -> list[types.Tool]:
+async def list_tools(*, connector: RAGFlowConnector, api_key: str, request: Any = None) -> list[types.Tool]:
    dataset_description = await connector.list_datasets(api_key=api_key)

    return [
        types.Tool(
-            name="ragflow_retrieval",
+            name="rag_retrieve",
            description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:"
            + dataset_description,
            inputSchema={
@ -597,7 +588,7 @@ async def call_tool(
    api_key: str,
    request: Any = None,
 ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
-    if name == "ragflow_retrieval":
+    if name == "rag_retrieve":
        document_ids = arguments.get("document_ids", [])
        dataset_ids = arguments.get("dataset_ids", [])
        question = arguments.get("question", "")
--- a/utils/settings.py
+++ b/utils/settings.py
@ -87,7 +87,7 @@ os.environ["OPENAI_API_KEY"] = "your_api_key"
 # ============================================================

 # RAGFlow API 配置
-RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://100.77.70.35:1080")
+RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://host.docker.internal:1080")
 RAGFLOW_API_KEY = os.getenv("RAGFLOW_API_KEY", "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8")

 # 文件上传配置