From 53fb98e44eac11871fdf5143f0413c96e517d8f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= <zhuchaowe@users.noreply.github.com>
Date: Thu, 16 Apr 2026 17:55:34 +0800
Subject: [PATCH 1/2] Retrieval Policy

---
 prompt/system_prompt.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prompt/system_prompt.md b/prompt/system_prompt.md
index 6607564..c0c8be9 100644
--- a/prompt/system_prompt.md
+++ b/prompt/system_prompt.md
@@ -69,9 +69,9 @@ When creating scripts in `executable_code/`, follow these organization rules:
 - Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
 - Downloaded file: `{agent_dir_path}/download/report.pdf`
 
-# Retrieval Policy (Priority & Fallback)
+# Retrieval Policy
 
-### 1. Retrieval Source Priority and Tool Selection
+### 1. Retrieval Order and Tool Selection
 - Follow this section for source choice, tool choice, query rewrite, `top_k`, fallback, result handling, and citations.
 - Use this default retrieval order and execute it sequentially: skill-enabled knowledge retrieval tools > `rag_retrieve` / `table_rag_retrieve` > local filesystem retrieval.
 - Do NOT answer from model knowledge first.

From e1bf685314f752243ca709150bdf529f07436ae7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= <zhuchaowe@users.noreply.github.com>
Date: Thu, 16 Apr 2026 19:38:13 +0800
Subject: [PATCH 2/2] add rag_retrieve autoload

---
 mcp/mcp_settings.json                         |  11 +-
 prompt/system_prompt.md                       |  54 ----
 .../rag-retrieve/.claude-plugin/plugin.json   |  22 ++
 skills_autoload/rag-retrieve/README.md        | 153 +++++++++++
 .../rag-retrieve/hooks/pre_prompt.py          |  20 ++
 .../rag-retrieve/hooks/retrieval-policy.md    |  53 ++++
 skills_autoload/rag-retrieve/mcp_common.py    | 251 ++++++++++++++++++
 .../rag-retrieve}/rag_retrieve_server.py      |   0
 .../rag-retrieve}/rag_retrieve_tools.json     |   0
 utils/multi_project_manager.py                |  38 ++-
 10 files changed, 525 insertions(+), 77 deletions(-)
 create mode 100644 skills_autoload/rag-retrieve/.claude-plugin/plugin.json
 create mode 100644 skills_autoload/rag-retrieve/README.md
 create mode 100644 skills_autoload/rag-retrieve/hooks/pre_prompt.py
 create mode 100644 skills_autoload/rag-retrieve/hooks/retrieval-policy.md
 create mode 100644 skills_autoload/rag-retrieve/mcp_common.py
 rename {mcp => skills_autoload/rag-retrieve}/rag_retrieve_server.py (100%)
 rename {mcp/tools => skills_autoload/rag-retrieve}/rag_retrieve_tools.json (100%)

diff --git a/mcp/mcp_settings.json b/mcp/mcp_settings.json
index ddf9962..3aa61ae 100644
--- a/mcp/mcp_settings.json
+++ b/mcp/mcp_settings.json
@@ -1,14 +1,5 @@
 [
   {
-    "mcpServers": {
-      "rag_retrieve": {
-        "transport": "stdio",
-        "command": "python",
-        "args": [
-          "./mcp/rag_retrieve_server.py",
-          "{bot_id}"
-        ]
-      }
-    }
+    "mcpServers": {}
   }
 ]
diff --git a/prompt/system_prompt.md b/prompt/system_prompt.md
index c0c8be9..7888edf 100644
--- a/prompt/system_prompt.md
+++ b/prompt/system_prompt.md
@@ -69,60 +69,6 @@ When creating scripts in `executable_code/`, follow these organization rules:
 - Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
 - Downloaded file: `{agent_dir_path}/download/report.pdf`
 
-# Retrieval Policy
-
-### 1. Retrieval Order and Tool Selection
-- Follow this section for source choice, tool choice, query rewrite, `top_k`, fallback, result handling, and citations.
-- Use this default retrieval order and execute it sequentially: skill-enabled knowledge retrieval tools > `rag_retrieve` / `table_rag_retrieve` > local filesystem retrieval.
-- Do NOT answer from model knowledge first.
-- Do NOT skip directly to local filesystem retrieval when an earlier retrieval source may answer the question.
-- When a suitable skill-enabled knowledge retrieval tool is available, use it first.
-- If no suitable skill-enabled retrieval tool is available, or if its result is insufficient, continue with `rag_retrieve` or `table_rag_retrieve`.
-- Use `table_rag_retrieve` first for values, prices, quantities, inventory, specifications, rankings, comparisons, summaries, extraction, lists, tables, name lookup, historical coverage, mixed questions, and unclear cases.
-- Use `rag_retrieve` first only for clearly pure concept, definition, workflow, policy, or explanation questions without structured data needs.
-- After each retrieval step, evaluate sufficiency before moving to the next source. Do NOT run these retrieval sources in parallel.
-
-### 2. Query Preparation
-- Do NOT pass the raw user question unless it already works well for retrieval.
-- Rewrite for recall: extract entity, time scope, attributes, and intent.
-- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms.
-- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively.
-- Preserve meaning. Do NOT introduce unrelated topics.
-
-### 3. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient.
-- Use `30` for simple fact lookup.
-- Use `50` for moderate synthesis, comparison, summarization, or disambiguation.
-- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests.
-- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow.
-- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`.
-
-### 4. Result Evaluation
-- Treat results as insufficient if they are empty, start with `Error:`, say `no excel files found`, are off-topic, miss the core entity or scope, or provide no usable evidence.
-- Also treat results as insufficient when they cover only part of the request, or when full-list, historical, comparison, or mixed data + explanation requests return only partial or truncated coverage.
-
-### 5. Fallback and Sequential Retry
-- If the first retrieval result is insufficient, call the next retrieval source in the default order before replying.
-- If the first RAG tool is insufficient, call the other RAG tool next before moving to local filesystem retrieval.
-- If `table_rag_retrieve` is insufficient or empty, continue with `rag_retrieve`.
-- If `rag_retrieve` is insufficient or empty, continue with `table_rag_retrieve`.
-- If both `rag_retrieve` and `table_rag_retrieve` are insufficient, continue with local filesystem retrieval.
-- Say no relevant information was found only after all applicable skill-enabled retrieval tools, both `rag_retrieve` and `table_rag_retrieve`, and local filesystem retrieval have been tried and still do not provide enough evidence.
-- Do NOT reply that no relevant information was found before the final local filesystem fallback has also been tried.
-
-### 6. Table RAG Result Handling
-- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` content in `table_rag_retrieve` results.
-- If results are truncated, explicitly tell the user total matches (`N+M`), displayed count (`N`), and omitted count (`M`).
-- Cite data sources using filenames from `file_ref_table`.
-
-### 7. Citation Requirements for Retrieved Knowledge
-- When using knowledge from `rag_retrieve` or `table_rag_retrieve`, you MUST generate `<CITATION ... />` tags.
-- Follow the citation format returned by each tool.
-- Place citations immediately after the paragraph or bullet list that uses the knowledge.
-- Do NOT collect citations at the end.
-- Use 1-2 citations per paragraph or bullet list when possible.
-- If learned knowledge is used, include at least 1 `<CITATION ... />`.
-
 # System Information
 <env>
 Working directory: {agent_dir_path}
diff --git a/skills_autoload/rag-retrieve/.claude-plugin/plugin.json b/skills_autoload/rag-retrieve/.claude-plugin/plugin.json
new file mode 100644
index 0000000..b57fdf3
--- /dev/null
+++ b/skills_autoload/rag-retrieve/.claude-plugin/plugin.json
@@ -0,0 +1,22 @@
+{
+  "name": "rag-retrieve",
+  "description": "rag-retrieve and table-rag-retrieve",
+  "hooks": {
+    "PrePrompt": [
+      {
+        "type": "command",
+        "command": "python hooks/pre_prompt.py"
+      }
+    ]
+  },
+  "mcpServers": {
+    "rag_retrieve": {
+      "transport": "stdio",
+      "command": "python",
+      "args": [
+        "./skills_autoload/rag-retrieve/rag_retrieve_server.py",
+        "{bot_id}"
+      ]
+    }
+  }
+}
diff --git a/skills_autoload/rag-retrieve/README.md b/skills_autoload/rag-retrieve/README.md
new file mode 100644
index 0000000..9177f2a
--- /dev/null
+++ b/skills_autoload/rag-retrieve/README.md
@@ -0,0 +1,153 @@
+# User Context Loader
+
+用户上下文加载器示例 Skill，演示 Claude Plugins 模式的 hooks 机制。
+
+## 功能说明
+
+本 Skill 演示了三种 Hook 类型：
+
+### PrePrompt Hook
+在 system_prompt 加载时执行，动态注入用户上下文信息。
+- 文件: `hooks/pre_prompt.py`
+- 用途: 查询用户信息、偏好设置、历史记录等，注入到 prompt 中
+
+### PostAgent Hook
+在 agent 执行完成后执行，用于后处理。
+- 文件: `hooks/post_agent.py`
+- 用途: 记录分析数据、触发异步任务、发送通知等
+
+### PreSave Hook
+在消息保存前执行，用于内容处理。
+- 文件: `hooks/pre_save.py`
+- 用途: 内容过滤、敏感信息脱敏、格式转换等
+
+## 目录结构
+
+```
+user-context-loader/
+├── README.md                   # Skill 说明文档
+├── .claude-plugin/
+│   └── plugin.json            # Hook 和 MCP 配置文件
+└── hooks/
+    ├── pre_prompt.py          # PrePrompt hook 脚本
+    ├── post_agent.py          # PostAgent hook 脚本
+    └── pre_save.py            # PreSave hook 脚本
+```
+
+## plugin.json 格式
+
+```json
+{
+  "name": "user-context-loader",
+  "description": "用户上下文加载器示例 Skill",
+  "hooks": {
+    "PrePrompt": [
+      {
+        "type": "command",
+        "command": "python hooks/pre_prompt.py"
+      }
+    ],
+    "PostAgent": [
+      {
+        "type": "command",
+        "command": "python hooks/post_agent.py"
+      }
+    ],
+    "PreSave": [
+      {
+        "type": "command",
+        "command": "python hooks/pre_save.py"
+      }
+    ]
+  },
+  "mcpServers": {
+    "server-name": {
+      "command": "node",
+      "args": ["path/to/server.js"],
+      "env": {
+        "API_KEY": "${API_KEY}"
+      }
+    }
+  }
+}
+```
+
+## Hook 脚本格式
+
+Hook 脚本通过子进程执行，通过环境变量接收参数，通过 stdout 返回结果。
+
+### 可用环境变量
+
+| 环境变量 | 说明 | 适用于 |
+|---------|------|--------|
+| `ASSISTANT_ID` | Bot ID | 所有 hook |
+| `USER_IDENTIFIER` | 用户标识 | 所有 hook |
+| `SESSION_ID` | 会话 ID | 所有 hook |
+| `LANGUAGE` | 语言代码 | 所有 hook |
+| `HOOK_TYPE` | Hook 类型 | 所有 hook |
+| `CONTENT` | 消息内容 | PreSave |
+| `ROLE` | 消息角色 | PreSave |
+| `RESPONSE` | Agent 响应 | PostAgent |
+| `METADATA` | 元数据 JSON | PostAgent |
+
+### PrePrompt 示例
+
+```python
+#!/usr/bin/env python3
+import os
+import sys
+
+def main():
+    user_identifier = os.environ.get('USER_IDENTIFIER', '')
+    bot_id = os.environ.get('ASSISTANT_ID', '')
+
+    # 输出要注入到 prompt 中的内容
+    print(f"## User Context\n\n用户: {user_identifier}")
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+```
+
+### PreSave 示例
+
+```python
+#!/usr/bin/env python3
+import os
+import sys
+
+def main():
+    content = os.environ.get('CONTENT', '')
+
+    # 处理内容并输出
+    print(content)  # 输出处理后的内容
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+```
+
+### PostAgent 示例
+
+```python
+#!/usr/bin/env python3
+import os
+import sys
+
+def main():
+    response = os.environ.get('RESPONSE', '')
+    session_id = os.environ.get('SESSION_ID', '')
+
+    # 记录日志（输出到 stderr）
+    print(f"Session {session_id}: Response length {len(response)}", file=sys.stderr)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+```
+
+## 使用场景
+
+1. **PrePrompt**: 用户登录时自动加载其偏好设置、历史订单等
+2. **PostAgent**: 记录对话分析数据，触发后续业务流程
+3. **PreSave**: 敏感信息脱敏后再存储，如手机号、邮箱等
diff --git a/skills_autoload/rag-retrieve/hooks/pre_prompt.py b/skills_autoload/rag-retrieve/hooks/pre_prompt.py
new file mode 100644
index 0000000..11f445d
--- /dev/null
+++ b/skills_autoload/rag-retrieve/hooks/pre_prompt.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+"""
+PreMemoryPrompt Hook - 用户上下文加载器示例
+
+在记忆提取提示词（FACT_RETRIEVAL_PROMPT）加载时执行，
+读取同目录下的 memory_prompt.md 作为自定义记忆提取提示词模板。
+"""
+import sys
+from pathlib import Path
+
+
+def main():
+    prompt_file = Path(__file__).parent / "retrieval-policy.md"
+    if prompt_file.exists():
+        print(prompt_file.read_text(encoding="utf-8"))
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/skills_autoload/rag-retrieve/hooks/retrieval-policy.md b/skills_autoload/rag-retrieve/hooks/retrieval-policy.md
new file mode 100644
index 0000000..6527185
--- /dev/null
+++ b/skills_autoload/rag-retrieve/hooks/retrieval-policy.md
@@ -0,0 +1,53 @@
+# Retrieval Policy
+
+### 1. Retrieval Order and Tool Selection
+- Follow this section for source choice, tool choice, query rewrite, `top_k`, fallback, result handling, and citations.
+- Use this default retrieval order and execute it sequentially: skill-enabled knowledge retrieval tools > `rag_retrieve` / `table_rag_retrieve` > local filesystem retrieval.
+- Do NOT answer from model knowledge first.
+- Do NOT skip directly to local filesystem retrieval when an earlier retrieval source may answer the question.
+- When a suitable skill-enabled knowledge retrieval tool is available, use it first.
+- If no suitable skill-enabled retrieval tool is available, or if its result is insufficient, continue with `rag_retrieve` or `table_rag_retrieve`.
+- Use `table_rag_retrieve` first for values, prices, quantities, inventory, specifications, rankings, comparisons, summaries, extraction, lists, tables, name lookup, historical coverage, mixed questions, and unclear cases.
+- Use `rag_retrieve` first only for clearly pure concept, definition, workflow, policy, or explanation questions without structured data needs.
+- After each retrieval step, evaluate sufficiency before moving to the next source. Do NOT run these retrieval sources in parallel.
+
+### 2. Query Preparation
+- Do NOT pass the raw user question unless it already works well for retrieval.
+- Rewrite for recall: extract entity, time scope, attributes, and intent.
+- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms.
+- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively.
+- Preserve meaning. Do NOT introduce unrelated topics.
+
+### 3. Retrieval Breadth (`top_k`)
+- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient.
+- Use `30` for simple fact lookup.
+- Use `50` for moderate synthesis, comparison, summarization, or disambiguation.
+- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests.
+- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow.
+- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`.
+
+### 4. Result Evaluation
+- Treat results as insufficient if they are empty, start with `Error:`, say `no excel files found`, are off-topic, miss the core entity or scope, or provide no usable evidence.
+- Also treat results as insufficient when they cover only part of the request, or when full-list, historical, comparison, or mixed data + explanation requests return only partial or truncated coverage.
+
+### 5. Fallback and Sequential Retry
+- If the first retrieval result is insufficient, call the next retrieval source in the default order before replying.
+- If the first RAG tool is insufficient, call the other RAG tool next before moving to local filesystem retrieval.
+- If `table_rag_retrieve` is insufficient or empty, continue with `rag_retrieve`.
+- If `rag_retrieve` is insufficient or empty, continue with `table_rag_retrieve`.
+- If both `rag_retrieve` and `table_rag_retrieve` are insufficient, continue with local filesystem retrieval.
+- Say no relevant information was found only after all applicable skill-enabled retrieval tools, both `rag_retrieve` and `table_rag_retrieve`, and local filesystem retrieval have been tried and still do not provide enough evidence.
+- Do NOT reply that no relevant information was found before the final local filesystem fallback has also been tried.
+
+### 6. Table RAG Result Handling
+- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` content in `table_rag_retrieve` results.
+- If results are truncated, explicitly tell the user total matches (`N+M`), displayed count (`N`), and omitted count (`M`).
+- Cite data sources using filenames from `file_ref_table`.
+
+### 7. Citation Requirements for Retrieved Knowledge
+- When using knowledge from `rag_retrieve` or `table_rag_retrieve`, you MUST generate `<CITATION ... />` tags.
+- Follow the citation format returned by each tool.
+- Place citations immediately after the paragraph or bullet list that uses the knowledge.
+- Do NOT collect citations at the end.
+- Use 1-2 citations per paragraph or bullet list when possible.
+- If learned knowledge is used, include at least 1 `<CITATION ... />`.
diff --git a/skills_autoload/rag-retrieve/mcp_common.py b/skills_autoload/rag-retrieve/mcp_common.py
new file mode 100644
index 0000000..5bf5935
--- /dev/null
+++ b/skills_autoload/rag-retrieve/mcp_common.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+"""
+MCP服务器通用工具函数
+提供路径处理、文件验证、请求处理等公共功能
+"""
+
+import json
+import os
+import sys
+import asyncio
+from typing import Any, Dict, List, Optional, Union
+import re
+
+def get_allowed_directory():
+    """获取允许访问的目录"""
+    # 优先使用命令行参数传入的dataset_dir
+    if len(sys.argv) > 1:
+        dataset_dir = sys.argv[1]
+        return os.path.abspath(dataset_dir)
+
+    # 从环境变量读取项目数据目录
+    project_dir = os.getenv("PROJECT_DATA_DIR", "./projects/data")
+    return os.path.abspath(project_dir)
+
+
+def resolve_file_path(file_path: str, default_subfolder: str = "default") -> str:
+    """
+    解析文件路径，支持 folder/document.txt 和 document.txt 两种格式
+    
+    Args:
+        file_path: 输入的文件路径
+        default_subfolder: 当只传入文件名时使用的默认子文件夹名称
+    
+    Returns:
+        解析后的完整文件路径
+    """
+    # 如果路径包含文件夹分隔符，直接使用
+    if '/' in file_path or '\\' in file_path:
+        clean_path = file_path.replace('\\', '/')
+        
+        # 移除 projects/ 前缀（如果存在）
+        if clean_path.startswith('projects/'):
+            clean_path = clean_path[9:]  # 移除 'projects/' 前缀
+        elif clean_path.startswith('./projects/'):
+            clean_path = clean_path[11:]  # 移除 './projects/' 前缀
+    else:
+        # 如果只有文件名，添加默认子文件夹
+        clean_path = f"{default_subfolder}/{file_path}"
+    
+    # 获取允许的目录
+    project_data_dir = get_allowed_directory()
+    
+    # 尝试在项目目录中查找文件
+    full_path = os.path.join(project_data_dir, clean_path.lstrip('./'))
+    if os.path.exists(full_path):
+        return full_path
+    
+    # 如果直接路径不存在，尝试递归查找
+    found = find_file_in_project(clean_path, project_data_dir)
+    if found:
+        return found
+    
+    # 如果是纯文件名且在default子文件夹中不存在，尝试在根目录查找
+    if '/' not in file_path and '\\' not in file_path:
+        root_path = os.path.join(project_data_dir, file_path)
+        if os.path.exists(root_path):
+            return root_path
+    
+    raise FileNotFoundError(f"File not found: {file_path} (searched in {project_data_dir})")
+
+
+def find_file_in_project(filename: str, project_dir: str) -> Optional[str]:
+    """在项目目录中递归查找文件"""
+    # 如果filename包含路径，只搜索指定的路径
+    if '/' in filename:
+        parts = filename.split('/')
+        target_file = parts[-1]
+        search_dir = os.path.join(project_dir, *parts[:-1])
+        
+        if os.path.exists(search_dir):
+            target_path = os.path.join(search_dir, target_file)
+            if os.path.exists(target_path):
+                return target_path
+    else:
+        # 纯文件名，递归搜索整个项目目录
+        for root, dirs, files in os.walk(project_dir):
+            if filename in files:
+                return os.path.join(root, filename)
+    return None
+
+
+def load_tools_from_json(tools_file_name: str) -> List[Dict[str, Any]]:
+    """从 JSON 文件加载工具定义"""
+    try:
+        tools_file = os.path.join(os.path.dirname(__file__), tools_file_name)
+        if os.path.exists(tools_file):
+            with open(tools_file, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        else:
+            # 如果 JSON 文件不存在，使用默认定义
+            return []
+    except Exception as e:
+        print(f"Warning: Unable to load tool definition JSON file: {str(e)}")
+        return []
+
+
+def create_error_response(request_id: Any, code: int, message: str) -> Dict[str, Any]:
+    """创建标准化的错误响应"""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "error": {
+            "code": code,
+            "message": message
+        }
+    }
+
+
+def create_success_response(request_id: Any, result: Any) -> Dict[str, Any]:
+    """创建标准化的成功响应"""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": result
+    }
+
+
+def create_initialize_response(request_id: Any, server_name: str, server_version: str = "1.0.0") -> Dict[str, Any]:
+    """创建标准化的初始化响应"""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {
+            "protocolVersion": "2024-11-05",
+            "capabilities": {
+                "tools": {}
+            },
+            "serverInfo": {
+                "name": server_name,
+                "version": server_version
+            }
+        }
+    }
+
+
+def create_ping_response(request_id: Any) -> Dict[str, Any]:
+    """创建标准化的ping响应"""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {
+            "pong": True
+        }
+    }
+
+
+def create_tools_list_response(request_id: Any, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """创建标准化的工具列表响应"""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {
+            "tools": tools
+        }
+    }
+
+
+def is_regex_pattern(pattern: str) -> bool:
+    """检测字符串是否为正则表达式模式"""
+    # 检查 /pattern/ 格式
+    if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2:
+        return True
+    
+    # 检查 r"pattern" 或 r'pattern' 格式
+    if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3:
+        return True
+    
+    # 检查是否包含正则特殊字符
+    regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'}
+    return any(char in pattern for char in regex_chars)
+
+
+def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]:
+    """编译正则表达式模式，如果不是正则则返回原字符串"""
+    if not is_regex_pattern(pattern):
+        return pattern
+    
+    try:
+        # 处理 /pattern/ 格式
+        if pattern.startswith('/') and pattern.endswith('/'):
+            regex_body = pattern[1:-1]
+            return re.compile(regex_body)
+        
+        # 处理 r"pattern" 或 r'pattern' 格式
+        if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")):
+            regex_body = pattern[2:-1]
+            return re.compile(regex_body)
+        
+        # 直接编译包含正则字符的字符串
+        return re.compile(pattern)
+    except re.error as e:
+        # 如果编译失败，返回None表示无效的正则
+        print(f"Warning: Regular expression '{pattern}' compilation failed: {e}")
+        return None
+
+
+async def handle_mcp_streaming(request_handler):
+    """处理MCP请求的标准主循环"""
+    try:
+        while True:
+            # Read from stdin
+            line = await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline)
+            if not line:
+                break
+            
+            line = line.strip()
+            if not line:
+                continue
+            
+            try:
+                request = json.loads(line)
+                response = await request_handler(request)
+                
+                # Write to stdout
+                sys.stdout.write(json.dumps(response, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+            
+            except json.JSONDecodeError:
+                error_response = {
+                    "jsonrpc": "2.0",
+                    "error": {
+                        "code": -32700,
+                        "message": "Parse error"
+                    }
+                }
+                sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+            
+            except Exception as e:
+                error_response = {
+                    "jsonrpc": "2.0",
+                    "error": {
+                        "code": -32603,
+                        "message": f"Internal error: {str(e)}"
+                    }
+                }
+                sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+    
+    except KeyboardInterrupt:
+        pass
diff --git a/mcp/rag_retrieve_server.py b/skills_autoload/rag-retrieve/rag_retrieve_server.py
similarity index 100%
rename from mcp/rag_retrieve_server.py
rename to skills_autoload/rag-retrieve/rag_retrieve_server.py
diff --git a/mcp/tools/rag_retrieve_tools.json b/skills_autoload/rag-retrieve/rag_retrieve_tools.json
similarity index 100%
rename from mcp/tools/rag_retrieve_tools.json
rename to skills_autoload/rag-retrieve/rag_retrieve_tools.json
diff --git a/utils/multi_project_manager.py b/utils/multi_project_manager.py
index be29923..a5bca91 100644
--- a/utils/multi_project_manager.py
+++ b/utils/multi_project_manager.py
@@ -320,6 +320,12 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
         str: 机器人项目目录路径
     """
 
+    skills = list(skills or [])
+    has_rag_retrieve = any(Path(skill.lstrip("@")).name == "rag-retrieve" for skill in skills)
+    if dataset_ids and not has_rag_retrieve:
+        skills.append("@skills_autoload/rag-retrieve")
+        logger.info("Auto loaded skill '@skills_autoload/rag-retrieve' because dataset_ids is not empty")
+
     logger.info(f"Ensuring robot project exists: {bot_id}, skills: {skills}")
 
     # 创建机器人目录结构（如果不存在）
@@ -375,27 +381,27 @@ def _extract_skills_to_robot(bot_id: str, skills: List[str], project_path: Path)
     - 如果是简单名称（如 "rag-retrieve"），从以下目录按优先级顺序查找：
       1. projects/uploads/{bot_id}/skills/
       2. skills/
+    - 如果是以 @ 开头的仓库相对路径（如 "@skills_autoload/rag-retrieve"），则从仓库根目录直接解析
 
     搜索目录优先级：先搜索 projects/uploads/{bot_id}/skills/，再搜索 skills/
 
     Args:
         bot_id: 机器人 ID
-        skills: 技能文件名列表（如 ["rag-retrieve", "projects/uploads/{bot_id}/skills/rag-retrieve"]）
+        skills: 技能文件名列表（如 ["rag-retrieve", "@skills_autoload/rag-retrieve", "projects/uploads/{bot_id}/skills/rag-retrieve"]）
         project_path: 项目路径
     """
-    import zipfile
-
     # skills 源目录（按优先级顺序）
+    repo_root = Path(__file__).resolve().parent.parent
     skills_source_dirs = [
         project_path / "uploads" / bot_id / "skills",
-        Path("skills"),
+        repo_root / "skills",
     ]
     skills_target_dir = project_path / "robot" / bot_id / "skills"
     skills_target_dir.mkdir(parents=True, exist_ok=True)
     logger.info(f"Copying skills to {skills_target_dir}")
 
     # 清理不在列表中的多余 skill 文件夹
-    expected_skill_names = {os.path.basename(skill) for skill in skills}
+    expected_skill_names = {Path(skill.lstrip("@")).name for skill in skills}
     if skills_target_dir.exists():
         for item in skills_target_dir.iterdir():
             if item.is_dir() and item.name not in expected_skill_names:
@@ -403,7 +409,8 @@ def _extract_skills_to_robot(bot_id: str, skills: List[str], project_path: Path)
                 shutil.rmtree(item)
 
     for skill in skills:
-        target_dir = skills_target_dir / os.path.basename(skill)
+        skill_name = Path(skill.lstrip("@")).name
+        target_dir = skills_target_dir / skill_name
 
         # 如果目标目录已存在，跳过复制
         if target_dir.exists():
@@ -412,20 +419,25 @@ def _extract_skills_to_robot(bot_id: str, skills: List[str], project_path: Path)
 
         source_dir = None
 
-        # 简单名称：按优先级顺序在多个目录中查找
-        for base_dir in skills_source_dirs:
-            candidate_dir = base_dir / skill
+        if skill.startswith("@"):
+            candidate_dir = repo_root / skill.lstrip("@")
             if candidate_dir.exists():
                 source_dir = candidate_dir
-                logger.info(f"  Found skill '{skill}' in {base_dir}")
-                break
+                logger.info(f"  Found skill '{skill}' at {candidate_dir}")
+
+        # 简单名称：按优先级顺序在多个目录中查找
+        if source_dir is None:
+            for base_dir in skills_source_dirs:
+                candidate_dir = base_dir / skill
+                if candidate_dir.exists():
+                    source_dir = candidate_dir
+                    logger.info(f"  Found skill '{skill}' in {base_dir}")
+                    break
 
         if source_dir is None:
             logger.warning(f"  Skill directory '{skill}' not found in any source directory: {[str(d) for d in skills_source_dirs]}")
             continue
 
-        target_dir = skills_target_dir / os.path.basename(skill)
-
         try:
             shutil.copytree(source_dir, target_dir)
             logger.info(f"  Copied: {source_dir} -> {target_dir}")