From 42a14088f84a7527c12558c9e94e0910baeff000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Wed, 22 Oct 2025 22:00:38 +0800 Subject: [PATCH] mcp dataset_dir placeholder --- fastapi_app.py | 8 +- mcp/mcp_settings.json | 6 +- mcp/tools/multi_keyword_search_tools.json | 6 +- prompt/system_prompt_default.md | 3 +- utils/api_models.py | 2 + utils/file_loaded_agent_manager.py | 72 +++--------- utils/prompt_loader.py | 132 +++++++++++++++++++--- 7 files changed, 146 insertions(+), 83 deletions(-) diff --git a/fastapi_app.py b/fastapi_app.py index a94fab2..1be3334 100644 --- a/fastapi_app.py +++ b/fastapi_app.py @@ -427,10 +427,10 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = # 使用unique_id获取项目目录 project_dir = os.path.join("projects", unique_id) if not os.path.exists(project_dir): - raise HTTPException(status_code=400, detail=f"Project directory not found for unique_id: {unique_id}") + project_dir = "" # 收集额外参数作为 generate_cfg - exclude_fields = {'messages', 'model', 'model_server', 'unique_id', 'language', 'tool_response', 'stream'} + exclude_fields = {'messages', 'model', 'model_server', 'unique_id', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream'} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} # 从全局管理器获取或创建助手实例(配置读取逻辑已在agent_manager内部处理) @@ -441,7 +441,9 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = api_key=api_key, model_server=request.model_server, generate_cfg=generate_cfg, - language=request.language + language=request.language, + system_prompt=request.system_prompt, + mcp_settings=request.mcp_settings ) # 构建包含项目信息的消息上下文 messages = [] diff --git a/mcp/mcp_settings.json b/mcp/mcp_settings.json index 4586c9a..30b39af 100644 --- a/mcp/mcp_settings.json +++ b/mcp/mcp_settings.json @@ -4,13 +4,15 @@ "semantic_search": { "command": "python", "args": [ - "./mcp/semantic_search_server.py" + "./mcp/semantic_search_server.py", + "{dataset_dir}" ] }, "multi_keyword": { "command": "python", "args": [ - "./mcp/multi_keyword_search_server.py" + "./mcp/multi_keyword_search_server.py", + "{dataset_dir}" ] } } diff --git a/mcp/tools/multi_keyword_search_tools.json b/mcp/tools/multi_keyword_search_tools.json index 4af3234..da44941 100644 --- a/mcp/tools/multi_keyword_search_tools.json +++ b/mcp/tools/multi_keyword_search_tools.json @@ -27,7 +27,7 @@ "items": { "type": "string" }, - "description": "List of file paths to search" + "description": "List of file paths or filenames to search. If relative path is provided, it will be combined with the dataset directory." }, "limit": { "type": "integer", @@ -61,7 +61,7 @@ "items": { "type": "string" }, - "description": "List of file paths to search" + "description": "List of file paths or filenames to search. If relative path is provided, it will be combined with the dataset directory." }, "context_lines": { "type": "integer", @@ -102,7 +102,7 @@ "items": { "type": "string" }, - "description": "List of file paths to search" + "description": "List of file paths or filenames to search. If relative path is provided, it will be combined with the dataset directory." }, "case_sensitive": { "type": "boolean", diff --git a/prompt/system_prompt_default.md b/prompt/system_prompt_default.md index 0602cf7..a1e6ae6 100644 --- a/prompt/system_prompt_default.md +++ b/prompt/system_prompt_default.md @@ -20,7 +20,8 @@ - 通过`semantic_search-semantic_search`工具可以实现语义检索,可以为关键词扩展提供赶上下文支持。 ### 目录结构 -#### 项目目录:{dataset_dir} +项目相关信息请通过 MCP 工具参数获取数据集目录信息。 + {readme} diff --git a/utils/api_models.py b/utils/api_models.py index eb50a8a..4a81a95 100644 --- a/utils/api_models.py +++ b/utils/api_models.py @@ -47,6 +47,8 @@ class ChatRequest(BaseModel): stream: Optional[bool] = False language: Optional[str] = "ja" tool_response: Optional[bool] = False + system_prompt: Optional[str] = None + mcp_settings: Optional[List[Dict]] = None class FileProcessRequest(BaseModel): diff --git a/utils/file_loaded_agent_manager.py b/utils/file_loaded_agent_manager.py index 1cb3781..0fd68b1 100644 --- a/utils/file_loaded_agent_manager.py +++ b/utils/file_loaded_agent_manager.py @@ -16,12 +16,14 @@ import hashlib import time +import json from typing import Dict, List, Optional from qwen_agent.agents import Assistant from qwen_agent.log import logger from modified_assistant import init_modified_agent_service_with_files, update_agent_llm +from .prompt_loader import load_system_prompt, load_mcp_settings class FileLoadedAgentManager: @@ -31,7 +33,7 @@ class FileLoadedAgentManager: """ def __init__(self, max_cached_agents: int = 20): - self.agents: Dict[str, Assistant] = {} # {unique_id: assistant_instance} + self.agents: Dict[str, Assistant] = {} # {cache_key: assistant_instance} self.unique_ids: Dict[str, str] = {} # {cache_key: unique_id} self.access_times: Dict[str, float] = {} # LRU 访问时间管理 self.creation_times: Dict[str, float] = {} # 创建时间记录 @@ -77,79 +79,31 @@ class FileLoadedAgentManager: api_key: Optional[str] = None, model_server: Optional[str] = None, generate_cfg: Optional[Dict] = None, - language: Optional[str] = None) -> Assistant: + language: Optional[str] = None, + system_prompt: Optional[str] = None, + mcp_settings: Optional[List[Dict]] = None) -> Assistant: """获取或创建文件预加载的助手实例 Args: unique_id: 项目的唯一标识符 - files: 需要预加载的文件路径列表 project_dir: 项目目录路径,用于读取system_prompt.md和mcp_settings.json model_name: 模型名称 api_key: API 密钥 model_server: 模型服务器地址 generate_cfg: 生成配置 language: 语言代码,用于选择对应的系统提示词 + system_prompt: 可选的系统提示词,优先级高于项目配置 + mcp_settings: 可选的MCP设置,优先级高于项目配置 Returns: Assistant: 配置好的助手实例 """ import os - import json - - # 使用prompt_loader读取system_prompt模板 - from .prompt_loader import load_system_prompt - system_prompt_template = load_system_prompt(project_dir, language) - - readme = "" - readme_path = os.path.join(project_dir, "README.md") - if os.path.exists(readme_path): - with open(readme_path, "r", encoding="utf-8") as f: - readme = f.read().strip() - dataset_dir = os.path.join(project_dir, "dataset") - - # 检查dataset_dir下是否只有一个default文件夹 - if os.path.exists(dataset_dir): - items = os.listdir(dataset_dir) - if len(items) == 1 and items[0] == "default": - dataset_dir = os.path.join(dataset_dir, "default") - # 获取语言显示名称 - language_display_map = { - 'zh': '中文', - 'en': 'English', - 'ja': '日本語', - 'jp': '日本語' - } - language_display = language_display_map.get(language, language if language else 'English') - - final_system_prompt = system_prompt_template.replace("{dataset_dir}", str(dataset_dir)).replace("{readme}", str(readme)).replace("{language}", language_display) - logger.info(f"Loaded global system_prompt for unique_id: {unique_id}") - if not final_system_prompt: - logger.info(f"No system_prompt found for unique_id: {unique_id}") - - # 读取mcp_settings:优先从项目目录读取,然后降级到全局配置 - final_mcp_settings = None - - # 尝试从项目目录读取 - mcp_settings_file = os.path.join(project_dir, "mcp_settings.json") - if os.path.exists(mcp_settings_file): - with open(mcp_settings_file, 'r', encoding='utf-8') as f: - final_mcp_settings = json.load(f) - logger.info(f"Loaded mcp_settings from project directory for unique_id: {unique_id}") - else: - # 降级到全局配置 - mcp_settings_path = "./mcp/mcp_settings.json" - if os.path.exists(mcp_settings_path): - with open(mcp_settings_path, "r", encoding="utf-8") as f: - final_mcp_settings = json.load(f) - logger.info(f"Loaded global mcp_settings for unique_id: {unique_id}") - else: - final_mcp_settings = [] - logger.info(f"No mcp_settings found for unique_id: {unique_id}") - - if final_mcp_settings is None: - final_mcp_settings = [] - + # 实现参数优先级逻辑:传入参数 > 项目配置 > 默认配置 + final_system_prompt = load_system_prompt(project_dir, language, system_prompt) + final_mcp_settings = load_mcp_settings(project_dir, mcp_settings) + cache_key = self._get_cache_key(unique_id) # 检查是否已存在该助手实例 @@ -158,7 +112,7 @@ class FileLoadedAgentManager: agent = self.agents[cache_key] # 动态更新 LLM 配置和系统设置(如果参数有变化) - update_agent_llm(agent, model_name, api_key, model_server, generate_cfg, final_system_prompt, final_mcp_settings) + update_agent_llm(agent, model_name, api_key, model_server, generate_cfg, final_system_prompt, mcp_settings) logger.info(f"复用现有的助手实例缓存: {cache_key} (unique_id: {unique_id}") return agent diff --git a/utils/prompt_loader.py b/utils/prompt_loader.py index 74de70b..9f0452d 100644 --- a/utils/prompt_loader.py +++ b/utils/prompt_loader.py @@ -1,11 +1,15 @@ #!/usr/bin/env python3 """ -System prompt loader utilities +System prompt and MCP settings loader utilities """ import os +import json +from typing import List, Dict, Optional + + +def load_system_prompt(project_dir: str, language: str = None, system_prompt: str=None) -> str: -def load_system_prompt(project_dir: str, language: str = None) -> str: """ 优先使用项目目录的system_prompt,没有才使用默认的system_prompt_default.md @@ -16,18 +20,18 @@ def load_system_prompt(project_dir: str, language: str = None) -> str: Returns: str: 加载到的系统提示词内容,如果都未找到则返回空字符串 """ - system_prompt = None - + # 1. 优先读取项目目录中的system_prompt - system_prompt_file = os.path.join(project_dir, "system_prompt.md") - if os.path.exists(system_prompt_file): - try: - with open(system_prompt_file, 'r', encoding='utf-8') as f: - system_prompt = f.read() - print(f"Using project-specific system prompt") - except Exception as e: - print(f"Failed to load project system prompt: {str(e)}") - system_prompt = None + if not system_prompt: + system_prompt_file = os.path.join(project_dir, "system_prompt.md") + if os.path.exists(system_prompt_file): + try: + with open(system_prompt_file, 'r', encoding='utf-8') as f: + system_prompt = f.read() + print(f"Using project-specific system prompt") + except Exception as e: + print(f"Failed to load project system prompt: {str(e)}") + system_prompt = None # 2. 如果项目目录没有,使用默认提示词 if not system_prompt: @@ -40,7 +44,21 @@ def load_system_prompt(project_dir: str, language: str = None) -> str: print(f"Failed to load default system prompt: {str(e)}") system_prompt = None - return system_prompt or "" + readme = "" + readme_path = os.path.join(project_dir, "README.md") + if os.path.exists(readme_path): + with open(readme_path, "r", encoding="utf-8") as f: + readme = f.read().strip() + + # 获取语言显示名称 + language_display_map = { + 'zh': '中文', + 'en': 'English', + 'ja': '日本語', + 'jp': '日本語' + } + language_display = language_display_map.get(language, language if language else 'English') + return system_prompt.replace("{readme}", str(readme)).replace("{language}", language_display) or "" def get_available_prompt_languages() -> list: @@ -63,6 +81,90 @@ def get_available_prompt_languages() -> list: return available_languages +def replace_mcp_placeholders(mcp_settings: List[Dict], dataset_dir: str) -> List[Dict]: + """ + 替换 MCP 配置中的占位符 + """ + if not mcp_settings or not isinstance(mcp_settings, list): + return mcp_settings + + def replace_placeholders_in_obj(obj): + """递归替换对象中的占位符""" + if isinstance(obj, dict): + for key, value in obj.items(): + if key == 'args' and isinstance(value, list): + # 特别处理 args 列表 + obj[key] = [item.replace('{dataset_dir}', dataset_dir) if isinstance(item, str) else item + for item in value] + elif isinstance(value, (dict, list)): + obj[key] = replace_placeholders_in_obj(value) + elif isinstance(value, str): + obj[key] = value.replace('{dataset_dir}', dataset_dir) + elif isinstance(obj, list): + return [replace_placeholders_in_obj(item) if isinstance(item, (dict, list)) else + item.replace('{dataset_dir}', dataset_dir) if isinstance(item, str) else item + for item in obj] + return obj + + return replace_placeholders_in_obj(mcp_settings) + +def load_mcp_settings(project_dir: str, mcp_settings: list=None) -> List[Dict]: + + """ + 优先使用项目目录的mcp_settings.json,没有才使用默认的mcp/mcp_settings.json + + Args: + project_dir: 项目目录路径 + + Returns: + List[Dict]: 加载到的MCP设置列表,如果都未找到则返回空列表 + + Note: + 支持在 mcp_settings.json 的 args 中使用 {dataset_dir} 占位符, + 会在 init_modified_agent_service_with_files 中被替换为实际的路径。 + """ + # 1. 优先读取项目目录中的mcp_settings.json + if mcp_settings is None: + mcp_settings_file = os.path.join(project_dir, "mcp_settings.json") + if os.path.exists(mcp_settings_file): + try: + with open(mcp_settings_file, 'r', encoding='utf-8') as f: + mcp_settings = json.load(f) + print(f"Using project-specific mcp_settings") + except Exception as e: + print(f"Failed to load project mcp_settings: {str(e)}") + mcp_settings = None + + # 2. 如果项目目录没有,使用默认MCP设置 + if mcp_settings is None: + try: + default_mcp_file = os.path.join("mcp", "mcp_settings.json") + if os.path.exists(default_mcp_file): + with open(default_mcp_file, 'r', encoding='utf-8') as f: + mcp_settings = json.load(f) + print(f"Using default mcp_settings from mcp folder") + else: + mcp_settings = [] + print(f"No default mcp_settings found, using empty list") + except Exception as e: + print(f"Failed to load default mcp_settings: {str(e)}") + mcp_settings = [] + + # 确保返回的是列表格式 + if mcp_settings is None: + mcp_settings = [] + elif not isinstance(mcp_settings, list): + print(f"Warning: mcp_settings is not a list, converting to list format") + mcp_settings = [mcp_settings] if mcp_settings else [] + + # 计算 dataset_dir 用于替换 MCP 配置中的占位符 + dataset_dir = os.path.join(project_dir, "dataset") + # 替换 MCP 配置中的 {dataset_dir} 占位符 + mcp_settings = replace_mcp_placeholders(mcp_settings, dataset_dir) + print(mcp_settings) + return mcp_settings + + def is_language_available(language: str) -> bool: """ 检查指定语言的提示词是否可用 @@ -74,4 +176,4 @@ def is_language_available(language: str) -> bool: bool: 如果可用返回True,否则返回False """ prompt_file = os.path.join("prompt", f"system_prompt_{language}.md") - return os.path.exists(prompt_file) \ No newline at end of file + return os.path.exists(prompt_file)