diff --git a/skills/rag-retrieve-only/.claude-plugin/plugin.json b/skills/rag-retrieve-only/.claude-plugin/plugin.json new file mode 100644 index 0000000..704ff52 --- /dev/null +++ b/skills/rag-retrieve-only/.claude-plugin/plugin.json @@ -0,0 +1,22 @@ +{ + "name": "rag-retrieve-only", + "description": "Only provides rag_retrieve. table_rag_retrieve and local file retrieval are disabled.", + "hooks": { + "PrePrompt": [ + { + "type": "command", + "command": "python hooks/pre_prompt.py" + } + ] + }, + "mcpServers": { + "rag_retrieve": { + "transport": "stdio", + "command": "python", + "args": [ + "./skills/rag-retrieve-only/rag_retrieve_server.py", + "{bot_id}" + ] + } + } +} diff --git a/skills/rag-retrieve-only/README.md b/skills/rag-retrieve-only/README.md new file mode 100644 index 0000000..55bdbdc --- /dev/null +++ b/skills/rag-retrieve-only/README.md @@ -0,0 +1,34 @@ +# rag-retrieve + +只保留 `rag_retrieve` 的精简版插件示例。 + +## 功能说明 + +- 通过 `PrePrompt` Hook 注入检索策略 +- 暴露 `rag_retrieve` MCP Server +- 插件仅支持 `rag_retrieve` +- 已禁用 `table_rag_retrieve` +- 已禁用本地文件检索 + +## 目录结构 + +```text +rag-retrieve-only/ +├── README.md +├── .claude-plugin/ +│ └── plugin.json +├── hooks/ +│ ├── pre_prompt.py +│ └── retrieval-policy.md +├── rag_retrieve_server.py +└── rag_retrieve_tools.json +``` + +## 当前检索策略 + +默认顺序:skill-enabled knowledge retrieval tools > `rag_retrieve` + +- 优先使用可用的技能内知识检索工具 +- 不足时使用 `rag_retrieve` +- 不并行执行多个检索源 +- 插件仅支持 `rag_retrieve` diff --git a/skills/rag-retrieve-only/hooks/pre_prompt.py b/skills/rag-retrieve-only/hooks/pre_prompt.py new file mode 100644 index 0000000..11f445d --- /dev/null +++ b/skills/rag-retrieve-only/hooks/pre_prompt.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +""" +PreMemoryPrompt Hook - 用户上下文加载器示例 + +在记忆提取提示词(FACT_RETRIEVAL_PROMPT)加载时执行, +读取同目录下的 memory_prompt.md 作为自定义记忆提取提示词模板。 +""" +import sys +from pathlib import Path + + +def main(): + prompt_file = Path(__file__).parent / "retrieval-policy.md" + if prompt_file.exists(): + print(prompt_file.read_text(encoding="utf-8")) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/skills/rag-retrieve-only/hooks/retrieval-policy.md b/skills/rag-retrieve-only/hooks/retrieval-policy.md new file mode 100644 index 0000000..09732b7 --- /dev/null +++ b/skills/rag-retrieve-only/hooks/retrieval-policy.md @@ -0,0 +1,31 @@ +# Retrieval Policy + +- `rag_retrieve` is the only knowledge source. +- Do NOT answer from model knowledge first. + +## 1.Query Preparation +- Do NOT pass the raw user question unless it already works well for retrieval. +- Rewrite for recall: extract entity, time scope, attributes, and intent. +- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms. +- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively. +- Preserve meaning. Do NOT introduce unrelated topics. + +## 2.Retrieval Breadth (`top_k`) +- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient. +- Use `30` for simple fact lookup. +- Use `50` for moderate synthesis, comparison, summarization, or disambiguation. +- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests. +- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow. +- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`. + +## 3.Retry +- If the result is insufficient, retry `rag_retrieve` with a better rewritten query or a larger `top_k`. +- Only say no relevant information was found after `rag_retrieve` has been tried and still provides insufficient evidence. + +## 4.Citation Requirements for Retrieved Knowledge +- When using knowledge from `rag_retrieve`, you MUST generate `` tags. +- Follow the citation format returned by each tool. +- Place citations immediately after the paragraph or bullet list that uses the knowledge. +- Do NOT collect citations at the end. +- Use 1-2 citations per paragraph or bullet list when possible. +- If learned knowledge is used, include at least 1 ``. diff --git a/skills/rag-retrieve-only/mcp_common.py b/skills/rag-retrieve-only/mcp_common.py new file mode 100644 index 0000000..5bf5935 --- /dev/null +++ b/skills/rag-retrieve-only/mcp_common.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +""" +MCP服务器通用工具函数 +提供路径处理、文件验证、请求处理等公共功能 +""" + +import json +import os +import sys +import asyncio +from typing import Any, Dict, List, Optional, Union +import re + +def get_allowed_directory(): + """获取允许访问的目录""" + # 优先使用命令行参数传入的dataset_dir + if len(sys.argv) > 1: + dataset_dir = sys.argv[1] + return os.path.abspath(dataset_dir) + + # 从环境变量读取项目数据目录 + project_dir = os.getenv("PROJECT_DATA_DIR", "./projects/data") + return os.path.abspath(project_dir) + + +def resolve_file_path(file_path: str, default_subfolder: str = "default") -> str: + """ + 解析文件路径,支持 folder/document.txt 和 document.txt 两种格式 + + Args: + file_path: 输入的文件路径 + default_subfolder: 当只传入文件名时使用的默认子文件夹名称 + + Returns: + 解析后的完整文件路径 + """ + # 如果路径包含文件夹分隔符,直接使用 + if '/' in file_path or '\\' in file_path: + clean_path = file_path.replace('\\', '/') + + # 移除 projects/ 前缀(如果存在) + if clean_path.startswith('projects/'): + clean_path = clean_path[9:] # 移除 'projects/' 前缀 + elif clean_path.startswith('./projects/'): + clean_path = clean_path[11:] # 移除 './projects/' 前缀 + else: + # 如果只有文件名,添加默认子文件夹 + clean_path = f"{default_subfolder}/{file_path}" + + # 获取允许的目录 + project_data_dir = get_allowed_directory() + + # 尝试在项目目录中查找文件 + full_path = os.path.join(project_data_dir, clean_path.lstrip('./')) + if os.path.exists(full_path): + return full_path + + # 如果直接路径不存在,尝试递归查找 + found = find_file_in_project(clean_path, project_data_dir) + if found: + return found + + # 如果是纯文件名且在default子文件夹中不存在,尝试在根目录查找 + if '/' not in file_path and '\\' not in file_path: + root_path = os.path.join(project_data_dir, file_path) + if os.path.exists(root_path): + return root_path + + raise FileNotFoundError(f"File not found: {file_path} (searched in {project_data_dir})") + + +def find_file_in_project(filename: str, project_dir: str) -> Optional[str]: + """在项目目录中递归查找文件""" + # 如果filename包含路径,只搜索指定的路径 + if '/' in filename: + parts = filename.split('/') + target_file = parts[-1] + search_dir = os.path.join(project_dir, *parts[:-1]) + + if os.path.exists(search_dir): + target_path = os.path.join(search_dir, target_file) + if os.path.exists(target_path): + return target_path + else: + # 纯文件名,递归搜索整个项目目录 + for root, dirs, files in os.walk(project_dir): + if filename in files: + return os.path.join(root, filename) + return None + + +def load_tools_from_json(tools_file_name: str) -> List[Dict[str, Any]]: + """从 JSON 文件加载工具定义""" + try: + tools_file = os.path.join(os.path.dirname(__file__), tools_file_name) + if os.path.exists(tools_file): + with open(tools_file, 'r', encoding='utf-8') as f: + return json.load(f) + else: + # 如果 JSON 文件不存在,使用默认定义 + return [] + except Exception as e: + print(f"Warning: Unable to load tool definition JSON file: {str(e)}") + return [] + + +def create_error_response(request_id: Any, code: int, message: str) -> Dict[str, Any]: + """创建标准化的错误响应""" + return { + "jsonrpc": "2.0", + "id": request_id, + "error": { + "code": code, + "message": message + } + } + + +def create_success_response(request_id: Any, result: Any) -> Dict[str, Any]: + """创建标准化的成功响应""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": result + } + + +def create_initialize_response(request_id: Any, server_name: str, server_version: str = "1.0.0") -> Dict[str, Any]: + """创建标准化的初始化响应""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": server_name, + "version": server_version + } + } + } + + +def create_ping_response(request_id: Any) -> Dict[str, Any]: + """创建标准化的ping响应""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "pong": True + } + } + + +def create_tools_list_response(request_id: Any, tools: List[Dict[str, Any]]) -> Dict[str, Any]: + """创建标准化的工具列表响应""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "tools": tools + } + } + + +def is_regex_pattern(pattern: str) -> bool: + """检测字符串是否为正则表达式模式""" + # 检查 /pattern/ 格式 + if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2: + return True + + # 检查 r"pattern" 或 r'pattern' 格式 + if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3: + return True + + # 检查是否包含正则特殊字符 + regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'} + return any(char in pattern for char in regex_chars) + + +def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]: + """编译正则表达式模式,如果不是正则则返回原字符串""" + if not is_regex_pattern(pattern): + return pattern + + try: + # 处理 /pattern/ 格式 + if pattern.startswith('/') and pattern.endswith('/'): + regex_body = pattern[1:-1] + return re.compile(regex_body) + + # 处理 r"pattern" 或 r'pattern' 格式 + if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")): + regex_body = pattern[2:-1] + return re.compile(regex_body) + + # 直接编译包含正则字符的字符串 + return re.compile(pattern) + except re.error as e: + # 如果编译失败,返回None表示无效的正则 + print(f"Warning: Regular expression '{pattern}' compilation failed: {e}") + return None + + +async def handle_mcp_streaming(request_handler): + """处理MCP请求的标准主循环""" + try: + while True: + # Read from stdin + line = await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline) + if not line: + break + + line = line.strip() + if not line: + continue + + try: + request = json.loads(line) + response = await request_handler(request) + + # Write to stdout + sys.stdout.write(json.dumps(response, ensure_ascii=False) + "\n") + sys.stdout.flush() + + except json.JSONDecodeError: + error_response = { + "jsonrpc": "2.0", + "error": { + "code": -32700, + "message": "Parse error" + } + } + sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n") + sys.stdout.flush() + + except Exception as e: + error_response = { + "jsonrpc": "2.0", + "error": { + "code": -32603, + "message": f"Internal error: {str(e)}" + } + } + sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n") + sys.stdout.flush() + + except KeyboardInterrupt: + pass diff --git a/skills/rag-retrieve-only/rag_retrieve_server.py b/skills/rag-retrieve-only/rag_retrieve_server.py new file mode 100644 index 0000000..88b093f --- /dev/null +++ b/skills/rag-retrieve-only/rag_retrieve_server.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +""" +RAG检索MCP服务器 +调用本地RAG API进行文档检索 +""" + +import asyncio +import hashlib +import json +import sys +import os +from typing import Any, Dict + +try: + import requests +except ImportError: + print("Error: requests module is required. Please install it with: pip install requests") + sys.exit(1) + +from mcp_common import ( + create_error_response, + create_success_response, + create_initialize_response, + create_ping_response, + create_tools_list_response, + load_tools_from_json, + handle_mcp_streaming +) +BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai") +MASTERKEY = os.getenv("MASTERKEY", "master") + +# Citation instruction prefixes injected into tool results +DOCUMENT_CITATION_INSTRUCTIONS = """ +When using the retrieved knowledge below, you MUST add XML citation tags for factual claims. + +## Document Knowledge +Format: `` +- Use `file` attribute with the UUID from document markers +- Use `filename` attribute with the actual filename from document markers +- Use `page` attribute (singular) with the page number +- `page` MUST be 0-based and must match the `pages:` values shown in the learned knowledge context + +## Web Page Knowledge +Format: `` +- Use `url` attribute with the web page URL from the source metadata +- Do not use `file`, `filename`, or `page` attributes for web sources +- If content is grounded in a web source, prefer a web citation with `url` over a file citation + +## Placement Rules +- Citations MUST appear IMMEDIATELY AFTER the paragraph or bullet list that uses the knowledge +- NEVER collect all citations and place them at the end of your response +- Limit to 1-2 citations per paragraph/bullet list +- If your answer uses learned knowledge, you MUST generate at least 1 `` in the response + + +""" + +def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: + """调用RAG检索API""" + try: + bot_id = "" + if len(sys.argv) > 1: + bot_id = sys.argv[1] + + url = f"{BACKEND_HOST}/v1/rag_retrieve/{bot_id}" + if not url: + return { + "content": [ + { + "type": "text", + "text": "Error: RAG API URL not provided. Please provide URL as command line argument." + } + ] + } + + masterkey = MASTERKEY + token_input = f"{masterkey}:{bot_id}" + auth_token = hashlib.md5(token_input.encode()).hexdigest() + + headers = { + "content-type": "application/json", + "authorization": f"Bearer {auth_token}" + } + data = { + "query": query, + "top_k": top_k + } + + response = requests.post(url, json=data, headers=headers, timeout=30) + + if response.status_code != 200: + return { + "content": [ + { + "type": "text", + "text": f"Error: RAG API returned status code {response.status_code}. Response: {response.text}" + } + ] + } + + try: + response_data = response.json() + except json.JSONDecodeError as e: + return { + "content": [ + { + "type": "text", + "text": f"Error: Failed to parse API response as JSON. Error: {str(e)}, Raw response: {response.text}" + } + ] + } + + if "markdown" in response_data: + markdown_content = response_data["markdown"] + return { + "content": [ + { + "type": "text", + "text": DOCUMENT_CITATION_INSTRUCTIONS + markdown_content + } + ] + } + else: + return { + "content": [ + { + "type": "text", + "text": f"Error: 'markdown' field not found in API response. Response: {json.dumps(response_data, indent=2, ensure_ascii=False)}" + } + ] + } + + except requests.exceptions.RequestException as e: + return { + "content": [ + { + "type": "text", + "text": f"Error: Failed to connect to RAG API. {str(e)}" + } + ] + } + except Exception as e: + return { + "content": [ + { + "type": "text", + "text": f"Error: {str(e)}" + } + ] + } + + +async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: + """Handle MCP request""" + try: + method = request.get("method") + params = request.get("params", {}) + request_id = request.get("id") + + if method == "initialize": + return create_initialize_response(request_id, "rag-retrieve") + + elif method == "ping": + return create_ping_response(request_id) + + elif method == "tools/list": + tools = load_tools_from_json("rag_retrieve_tools.json") + if not tools: + tools = [ + { + "name": "rag_retrieve", + "description": "调用RAG检索API,根据查询内容检索相关文档。返回包含相关内容的markdown格式结果。", + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "检索查询内容" + } + }, + "required": ["query"] + } + } + ] + return create_tools_list_response(request_id, tools) + + elif method == "tools/call": + tool_name = params.get("name") + arguments = params.get("arguments", {}) + + if tool_name == "rag_retrieve": + query = arguments.get("query", "") + top_k = arguments.get("top_k", 100) + + if not query: + return create_error_response(request_id, -32602, "Missing required parameter: query") + + result = rag_retrieve(query, top_k) + + return { + "jsonrpc": "2.0", + "id": request_id, + "result": result + } + + else: + return create_error_response(request_id, -32601, f"Unknown tool: {tool_name}") + + else: + return create_error_response(request_id, -32601, f"Unknown method: {method}") + + except Exception as e: + return create_error_response(request.get("id"), -32603, f"Internal error: {str(e)}") + + +async def main(): + """Main entry point.""" + await handle_mcp_streaming(handle_request) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/skills/rag-retrieve-only/rag_retrieve_tools.json b/skills/rag-retrieve-only/rag_retrieve_tools.json new file mode 100644 index 0000000..91d52cf --- /dev/null +++ b/skills/rag-retrieve-only/rag_retrieve_tools.json @@ -0,0 +1,21 @@ +[ + { + "name": "rag_retrieve", + "description": "Retrieve relevant documents from the knowledge base. Returns markdown results. Use this tool for concept, definition, workflow, policy, explanation, and general knowledge lookup. Rewrite the query when needed to improve recall.", + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Retrieval query content. Rewrite the query when needed to improve recall." + }, + "top_k": { + "type": "integer", + "description": "Number of top results to retrieve. Choose dynamically based on retrieval breadth and coverage needs.", + "default": 100 + } + }, + "required": ["query"] + } + } +] diff --git a/skills_autoload/rag-retrieve/.claude-plugin/plugin.json b/skills_autoload/rag-retrieve/.claude-plugin/plugin.json index b57fdf3..925751c 100644 --- a/skills_autoload/rag-retrieve/.claude-plugin/plugin.json +++ b/skills_autoload/rag-retrieve/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "rag-retrieve", - "description": "rag-retrieve and table-rag-retrieve", + "description": "Provides RAG and table RAG retrieval tools through a PrePrompt hook and MCP server.", "hooks": { "PrePrompt": [ { diff --git a/skills_autoload/rag-retrieve/README.md b/skills_autoload/rag-retrieve/README.md index 9177f2a..acf13ac 100644 --- a/skills_autoload/rag-retrieve/README.md +++ b/skills_autoload/rag-retrieve/README.md @@ -1,153 +1,99 @@ -# User Context Loader +# RAG Retrieve -用户上下文加载器示例 Skill,演示 Claude Plugins 模式的 hooks 机制。 +An example autoload skill that demonstrates how to integrate `rag-retrieve` and `table-rag-retrieve` through Claude Plugins hooks and an MCP server. -## 功能说明 +## Overview -本 Skill 演示了三种 Hook 类型: +This skill uses a `PrePrompt` hook to inject retrieval guidance into the prompt, and starts an MCP server that exposes retrieval capabilities for the current bot. ### PrePrompt Hook -在 system_prompt 加载时执行,动态注入用户上下文信息。 -- 文件: `hooks/pre_prompt.py` -- 用途: 查询用户信息、偏好设置、历史记录等,注入到 prompt 中 +Runs when the system prompt is loaded and injects retrieval policy content. +- File: `hooks/pre_prompt.py` +- Purpose: load retrieval instructions and add them to the prompt context -### PostAgent Hook -在 agent 执行完成后执行,用于后处理。 -- 文件: `hooks/post_agent.py` -- 用途: 记录分析数据、触发异步任务、发送通知等 +### MCP Server +Provides retrieval tools over stdio for the current `bot_id`. +- File: `rag_retrieve_server.py` +- Purpose: expose `rag-retrieve` and related retrieval tools to the agent -### PreSave Hook -在消息保存前执行,用于内容处理。 -- 文件: `hooks/pre_save.py` -- 用途: 内容过滤、敏感信息脱敏、格式转换等 +## Directory Structure -## 目录结构 - -``` -user-context-loader/ -├── README.md # Skill 说明文档 +```text +rag-retrieve/ +├── README.md # Skill documentation ├── .claude-plugin/ -│ └── plugin.json # Hook 和 MCP 配置文件 -└── hooks/ - ├── pre_prompt.py # PrePrompt hook 脚本 - ├── post_agent.py # PostAgent hook 脚本 - └── pre_save.py # PreSave hook 脚本 +│ └── plugin.json # Hook and MCP server configuration +├── hooks/ +│ ├── pre_prompt.py # PrePrompt hook script +│ └── retrieval-policy.md # Retrieval policy injected into the prompt +├── mcp_common.py # Shared MCP utilities +├── rag_retrieve_server.py # MCP server entrypoint +└── rag_retrieve_tools.json # Tool definitions ``` -## plugin.json 格式 +## `plugin.json` Format ```json { - "name": "user-context-loader", - "description": "用户上下文加载器示例 Skill", + "name": "rag-retrieve", + "description": "rag-retrieve and table-rag-retrieve", "hooks": { "PrePrompt": [ { "type": "command", "command": "python hooks/pre_prompt.py" } - ], - "PostAgent": [ - { - "type": "command", - "command": "python hooks/post_agent.py" - } - ], - "PreSave": [ - { - "type": "command", - "command": "python hooks/pre_save.py" - } ] }, "mcpServers": { - "server-name": { - "command": "node", - "args": ["path/to/server.js"], - "env": { - "API_KEY": "${API_KEY}" - } + "rag_retrieve": { + "transport": "stdio", + "command": "python", + "args": [ + "./skills_autoload/rag-retrieve/rag_retrieve_server.py", + "{bot_id}" + ] } } } ``` -## Hook 脚本格式 +## Hook Script Behavior -Hook 脚本通过子进程执行,通过环境变量接收参数,通过 stdout 返回结果。 +The hook script runs as a subprocess, receives input through environment variables, and writes the injected content to stdout. -### 可用环境变量 +### Available Environment Variables -| 环境变量 | 说明 | 适用于 | -|---------|------|--------| -| `ASSISTANT_ID` | Bot ID | 所有 hook | -| `USER_IDENTIFIER` | 用户标识 | 所有 hook | -| `SESSION_ID` | 会话 ID | 所有 hook | -| `LANGUAGE` | 语言代码 | 所有 hook | -| `HOOK_TYPE` | Hook 类型 | 所有 hook | -| `CONTENT` | 消息内容 | PreSave | -| `ROLE` | 消息角色 | PreSave | -| `RESPONSE` | Agent 响应 | PostAgent | -| `METADATA` | 元数据 JSON | PostAgent | +| Environment Variable | Description | Applies To | +|----------------------|-------------|------------| +| `ASSISTANT_ID` | Bot ID | All hooks | +| `USER_IDENTIFIER` | User identifier | All hooks | +| `SESSION_ID` | Session ID | All hooks | +| `LANGUAGE` | Language code | All hooks | +| `HOOK_TYPE` | Hook type | All hooks | -### PrePrompt 示例 +### PrePrompt Example ```python #!/usr/bin/env python3 import os import sys + def main(): user_identifier = os.environ.get('USER_IDENTIFIER', '') bot_id = os.environ.get('ASSISTANT_ID', '') - # 输出要注入到 prompt 中的内容 - print(f"## User Context\n\n用户: {user_identifier}") + print(f"## Retrieval Context\n\nUser: {user_identifier}\nBot: {bot_id}") return 0 + if __name__ == '__main__': sys.exit(main()) ``` -### PreSave 示例 +## Example Use Cases -```python -#!/usr/bin/env python3 -import os -import sys - -def main(): - content = os.environ.get('CONTENT', '') - - # 处理内容并输出 - print(content) # 输出处理后的内容 - return 0 - -if __name__ == '__main__': - sys.exit(main()) -``` - -### PostAgent 示例 - -```python -#!/usr/bin/env python3 -import os -import sys - -def main(): - response = os.environ.get('RESPONSE', '') - session_id = os.environ.get('SESSION_ID', '') - - # 记录日志(输出到 stderr) - print(f"Session {session_id}: Response length {len(response)}", file=sys.stderr) - return 0 - -if __name__ == '__main__': - sys.exit(main()) -``` - -## 使用场景 - -1. **PrePrompt**: 用户登录时自动加载其偏好设置、历史订单等 -2. **PostAgent**: 记录对话分析数据,触发后续业务流程 -3. **PreSave**: 敏感信息脱敏后再存储,如手机号、邮箱等 +1. **Prompt-time retrieval guidance**: inject retrieval rules before the model starts reasoning +2. **Bot-specific retrieval setup**: start the MCP server with the current `bot_id` +3. **Unified retrieval access**: expose RAG and table RAG tools through a single skill diff --git a/skills_developing/rag-retrieve/Retrieval_Policy.md b/skills_developing/rag-retrieve-cli/Retrieval_Policy.md similarity index 100% rename from skills_developing/rag-retrieve/Retrieval_Policy.md rename to skills_developing/rag-retrieve-cli/Retrieval_Policy.md diff --git a/skills_developing/rag-retrieve/SKILL.md b/skills_developing/rag-retrieve-cli/SKILL.md similarity index 100% rename from skills_developing/rag-retrieve/SKILL.md rename to skills_developing/rag-retrieve-cli/SKILL.md diff --git a/skills_developing/rag-retrieve/scripts/rag_retrieve.py b/skills_developing/rag-retrieve-cli/scripts/rag_retrieve.py similarity index 100% rename from skills_developing/rag-retrieve/scripts/rag_retrieve.py rename to skills_developing/rag-retrieve-cli/scripts/rag_retrieve.py diff --git a/skills_developing/rag-retrieve/skill.yaml b/skills_developing/rag-retrieve-cli/skill.yaml similarity index 100% rename from skills_developing/rag-retrieve/skill.yaml rename to skills_developing/rag-retrieve-cli/skill.yaml diff --git a/utils/multi_project_manager.py b/utils/multi_project_manager.py index a5bca91..5ca31bd 100644 --- a/utils/multi_project_manager.py +++ b/utils/multi_project_manager.py @@ -7,6 +7,7 @@ import os import shutil import json import logging +import re from pathlib import Path from typing import List, Dict, Optional from datetime import datetime @@ -321,7 +322,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo """ skills = list(skills or []) - has_rag_retrieve = any(Path(skill.lstrip("@")).name == "rag-retrieve" for skill in skills) + has_rag_retrieve = any(re.search(r"rag-retrieve", skill) for skill in skills) if dataset_ids and not has_rag_retrieve: skills.append("@skills_autoload/rag-retrieve") logger.info("Auto loaded skill '@skills_autoload/rag-retrieve' because dataset_ids is not empty")