rag_flow
This commit is contained in:
parent
4e4052874a
commit
5f6e806b18
@ -3,7 +3,7 @@
|
|||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"rag_retrieve": {
|
"rag_retrieve": {
|
||||||
"transport": "http",
|
"transport": "http",
|
||||||
"url": "http://100.77.70.35:9382/mcp/",
|
"url": "http://host.docker.internal:9382/mcp/",
|
||||||
"headers": {
|
"headers": {
|
||||||
"api_key": "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8",
|
"api_key": "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8",
|
||||||
"X-Dataset-Ids": "{dataset_ids}"
|
"X-Dataset-Ids": "{dataset_ids}"
|
||||||
|
|||||||
@ -206,35 +206,26 @@ class RAGFlowConnector:
|
|||||||
res = res.json()
|
res = res.json()
|
||||||
if res.get("code") == 0:
|
if res.get("code") == 0:
|
||||||
data = res["data"]
|
data = res["data"]
|
||||||
chunks = []
|
|
||||||
|
|
||||||
# Cache document metadata and dataset information
|
# Cache document metadata and dataset information
|
||||||
document_cache, dataset_cache = await self._get_document_metadata_cache(dataset_ids, api_key=api_key, force_refresh=force_refresh)
|
document_cache, dataset_cache = await self._get_document_metadata_cache(dataset_ids, api_key=api_key, force_refresh=force_refresh)
|
||||||
|
|
||||||
# Process chunks with enhanced field mapping including per-chunk metadata
|
# Build markdown response with only required fields
|
||||||
|
markdown_lines = []
|
||||||
for chunk_data in data.get("chunks", []):
|
for chunk_data in data.get("chunks", []):
|
||||||
enhanced_chunk = self._map_chunk_fields(chunk_data, dataset_cache, document_cache)
|
enhanced_chunk = self._map_chunk_fields(chunk_data, dataset_cache, document_cache)
|
||||||
chunks.append(enhanced_chunk)
|
document_name = enhanced_chunk.get("document_name", enhanced_chunk.get("document_keyword", ""))
|
||||||
|
content = enhanced_chunk.get("content", "")
|
||||||
|
document_id = enhanced_chunk.get("document_id", "")
|
||||||
|
|
||||||
# Build structured response (no longer need response-level document_metadata)
|
markdown_lines.append(f"**document_id**: {document_id}")
|
||||||
response = {
|
markdown_lines.append(f"{document_name}:")
|
||||||
"chunks": chunks,
|
markdown_lines.append(f"{content}")
|
||||||
"pagination": {
|
markdown_lines.append("---")
|
||||||
"page": data.get("page", page),
|
|
||||||
"page_size": data.get("page_size", page_size),
|
|
||||||
"total_chunks": data.get("total", len(chunks)),
|
|
||||||
"total_pages": (data.get("total", len(chunks)) + page_size - 1) // page_size,
|
|
||||||
},
|
|
||||||
"query_info": {
|
|
||||||
"question": question,
|
|
||||||
"similarity_threshold": similarity_threshold,
|
|
||||||
"vector_weight": vector_similarity_weight,
|
|
||||||
"keyword_search": keyword,
|
|
||||||
"dataset_count": len(dataset_ids),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return [types.TextContent(type="text", text=json.dumps(response, ensure_ascii=False))]
|
markdown_output = "\n".join(markdown_lines)
|
||||||
|
|
||||||
|
return [types.TextContent(type="text", text=markdown_output)]
|
||||||
|
|
||||||
raise Exception([types.TextContent(type="text", text=res.get("message"))])
|
raise Exception([types.TextContent(type="text", text=res.get("message"))])
|
||||||
|
|
||||||
@ -518,12 +509,12 @@ def with_api_key(required: bool = True):
|
|||||||
|
|
||||||
@app.list_tools()
|
@app.list_tools()
|
||||||
@with_api_key(required=True)
|
@with_api_key(required=True)
|
||||||
async def list_tools(*, connector: RAGFlowConnector, api_key: str) -> list[types.Tool]:
|
async def list_tools(*, connector: RAGFlowConnector, api_key: str, request: Any = None) -> list[types.Tool]:
|
||||||
dataset_description = await connector.list_datasets(api_key=api_key)
|
dataset_description = await connector.list_datasets(api_key=api_key)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
types.Tool(
|
types.Tool(
|
||||||
name="ragflow_retrieval",
|
name="rag_retrieve",
|
||||||
description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:"
|
description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:"
|
||||||
+ dataset_description,
|
+ dataset_description,
|
||||||
inputSchema={
|
inputSchema={
|
||||||
@ -597,7 +588,7 @@ async def call_tool(
|
|||||||
api_key: str,
|
api_key: str,
|
||||||
request: Any = None,
|
request: Any = None,
|
||||||
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
||||||
if name == "ragflow_retrieval":
|
if name == "rag_retrieve":
|
||||||
document_ids = arguments.get("document_ids", [])
|
document_ids = arguments.get("document_ids", [])
|
||||||
dataset_ids = arguments.get("dataset_ids", [])
|
dataset_ids = arguments.get("dataset_ids", [])
|
||||||
question = arguments.get("question", "")
|
question = arguments.get("question", "")
|
||||||
|
|||||||
@ -87,7 +87,7 @@ os.environ["OPENAI_API_KEY"] = "your_api_key"
|
|||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
# RAGFlow API 配置
|
# RAGFlow API 配置
|
||||||
RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://100.77.70.35:1080")
|
RAGFLOW_API_URL = os.getenv("RAGFLOW_API_URL", "http://host.docker.internal:1080")
|
||||||
RAGFLOW_API_KEY = os.getenv("RAGFLOW_API_KEY", "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8")
|
RAGFLOW_API_KEY = os.getenv("RAGFLOW_API_KEY", "ragflow-MRqxnDnYZ1yp5kklDMIlKH4f1qezvXIngSMGPhu1AG8")
|
||||||
|
|
||||||
# 文件上传配置
|
# 文件上传配置
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user