mcp dataset_dir placeholder

This commit is contained in:
朱潮 2025-10-22 22:00:38 +08:00
parent 85de4bf6da
commit 42a14088f8
7 changed files with 146 additions and 83 deletions

View File

@ -427,10 +427,10 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
# 使用unique_id获取项目目录 # 使用unique_id获取项目目录
project_dir = os.path.join("projects", unique_id) project_dir = os.path.join("projects", unique_id)
if not os.path.exists(project_dir): if not os.path.exists(project_dir):
raise HTTPException(status_code=400, detail=f"Project directory not found for unique_id: {unique_id}") project_dir = ""
# 收集额外参数作为 generate_cfg # 收集额外参数作为 generate_cfg
exclude_fields = {'messages', 'model', 'model_server', 'unique_id', 'language', 'tool_response', 'stream'} exclude_fields = {'messages', 'model', 'model_server', 'unique_id', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream'}
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
# 从全局管理器获取或创建助手实例配置读取逻辑已在agent_manager内部处理 # 从全局管理器获取或创建助手实例配置读取逻辑已在agent_manager内部处理
@ -441,7 +441,9 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
api_key=api_key, api_key=api_key,
model_server=request.model_server, model_server=request.model_server,
generate_cfg=generate_cfg, generate_cfg=generate_cfg,
language=request.language language=request.language,
system_prompt=request.system_prompt,
mcp_settings=request.mcp_settings
) )
# 构建包含项目信息的消息上下文 # 构建包含项目信息的消息上下文
messages = [] messages = []

View File

@ -4,13 +4,15 @@
"semantic_search": { "semantic_search": {
"command": "python", "command": "python",
"args": [ "args": [
"./mcp/semantic_search_server.py" "./mcp/semantic_search_server.py",
"{dataset_dir}"
] ]
}, },
"multi_keyword": { "multi_keyword": {
"command": "python", "command": "python",
"args": [ "args": [
"./mcp/multi_keyword_search_server.py" "./mcp/multi_keyword_search_server.py",
"{dataset_dir}"
] ]
} }
} }

View File

@ -27,7 +27,7 @@
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "List of file paths to search" "description": "List of file paths or filenames to search. If relative path is provided, it will be combined with the dataset directory."
}, },
"limit": { "limit": {
"type": "integer", "type": "integer",
@ -61,7 +61,7 @@
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "List of file paths to search" "description": "List of file paths or filenames to search. If relative path is provided, it will be combined with the dataset directory."
}, },
"context_lines": { "context_lines": {
"type": "integer", "type": "integer",
@ -102,7 +102,7 @@
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "List of file paths to search" "description": "List of file paths or filenames to search. If relative path is provided, it will be combined with the dataset directory."
}, },
"case_sensitive": { "case_sensitive": {
"type": "boolean", "type": "boolean",

View File

@ -20,7 +20,8 @@
- 通过`semantic_search-semantic_search`工具可以实现语义检索,可以为关键词扩展提供赶上下文支持。 - 通过`semantic_search-semantic_search`工具可以实现语义检索,可以为关键词扩展提供赶上下文支持。
### 目录结构 ### 目录结构
#### 项目目录:{dataset_dir} 项目相关信息请通过 MCP 工具参数获取数据集目录信息。
{readme} {readme}

View File

@ -47,6 +47,8 @@ class ChatRequest(BaseModel):
stream: Optional[bool] = False stream: Optional[bool] = False
language: Optional[str] = "ja" language: Optional[str] = "ja"
tool_response: Optional[bool] = False tool_response: Optional[bool] = False
system_prompt: Optional[str] = None
mcp_settings: Optional[List[Dict]] = None
class FileProcessRequest(BaseModel): class FileProcessRequest(BaseModel):

View File

@ -16,12 +16,14 @@
import hashlib import hashlib
import time import time
import json
from typing import Dict, List, Optional from typing import Dict, List, Optional
from qwen_agent.agents import Assistant from qwen_agent.agents import Assistant
from qwen_agent.log import logger from qwen_agent.log import logger
from modified_assistant import init_modified_agent_service_with_files, update_agent_llm from modified_assistant import init_modified_agent_service_with_files, update_agent_llm
from .prompt_loader import load_system_prompt, load_mcp_settings
class FileLoadedAgentManager: class FileLoadedAgentManager:
@ -31,7 +33,7 @@ class FileLoadedAgentManager:
""" """
def __init__(self, max_cached_agents: int = 20): def __init__(self, max_cached_agents: int = 20):
self.agents: Dict[str, Assistant] = {} # {unique_id: assistant_instance} self.agents: Dict[str, Assistant] = {} # {cache_key: assistant_instance}
self.unique_ids: Dict[str, str] = {} # {cache_key: unique_id} self.unique_ids: Dict[str, str] = {} # {cache_key: unique_id}
self.access_times: Dict[str, float] = {} # LRU 访问时间管理 self.access_times: Dict[str, float] = {} # LRU 访问时间管理
self.creation_times: Dict[str, float] = {} # 创建时间记录 self.creation_times: Dict[str, float] = {} # 创建时间记录
@ -77,78 +79,30 @@ class FileLoadedAgentManager:
api_key: Optional[str] = None, api_key: Optional[str] = None,
model_server: Optional[str] = None, model_server: Optional[str] = None,
generate_cfg: Optional[Dict] = None, generate_cfg: Optional[Dict] = None,
language: Optional[str] = None) -> Assistant: language: Optional[str] = None,
system_prompt: Optional[str] = None,
mcp_settings: Optional[List[Dict]] = None) -> Assistant:
"""获取或创建文件预加载的助手实例 """获取或创建文件预加载的助手实例
Args: Args:
unique_id: 项目的唯一标识符 unique_id: 项目的唯一标识符
files: 需要预加载的文件路径列表
project_dir: 项目目录路径用于读取system_prompt.md和mcp_settings.json project_dir: 项目目录路径用于读取system_prompt.md和mcp_settings.json
model_name: 模型名称 model_name: 模型名称
api_key: API 密钥 api_key: API 密钥
model_server: 模型服务器地址 model_server: 模型服务器地址
generate_cfg: 生成配置 generate_cfg: 生成配置
language: 语言代码用于选择对应的系统提示词 language: 语言代码用于选择对应的系统提示词
system_prompt: 可选的系统提示词优先级高于项目配置
mcp_settings: 可选的MCP设置优先级高于项目配置
Returns: Returns:
Assistant: 配置好的助手实例 Assistant: 配置好的助手实例
""" """
import os import os
import json
# 使用prompt_loader读取system_prompt模板 # 实现参数优先级逻辑:传入参数 > 项目配置 > 默认配置
from .prompt_loader import load_system_prompt final_system_prompt = load_system_prompt(project_dir, language, system_prompt)
system_prompt_template = load_system_prompt(project_dir, language) final_mcp_settings = load_mcp_settings(project_dir, mcp_settings)
readme = ""
readme_path = os.path.join(project_dir, "README.md")
if os.path.exists(readme_path):
with open(readme_path, "r", encoding="utf-8") as f:
readme = f.read().strip()
dataset_dir = os.path.join(project_dir, "dataset")
# 检查dataset_dir下是否只有一个default文件夹
if os.path.exists(dataset_dir):
items = os.listdir(dataset_dir)
if len(items) == 1 and items[0] == "default":
dataset_dir = os.path.join(dataset_dir, "default")
# 获取语言显示名称
language_display_map = {
'zh': '中文',
'en': 'English',
'ja': '日本語',
'jp': '日本語'
}
language_display = language_display_map.get(language, language if language else 'English')
final_system_prompt = system_prompt_template.replace("{dataset_dir}", str(dataset_dir)).replace("{readme}", str(readme)).replace("{language}", language_display)
logger.info(f"Loaded global system_prompt for unique_id: {unique_id}")
if not final_system_prompt:
logger.info(f"No system_prompt found for unique_id: {unique_id}")
# 读取mcp_settings优先从项目目录读取然后降级到全局配置
final_mcp_settings = None
# 尝试从项目目录读取
mcp_settings_file = os.path.join(project_dir, "mcp_settings.json")
if os.path.exists(mcp_settings_file):
with open(mcp_settings_file, 'r', encoding='utf-8') as f:
final_mcp_settings = json.load(f)
logger.info(f"Loaded mcp_settings from project directory for unique_id: {unique_id}")
else:
# 降级到全局配置
mcp_settings_path = "./mcp/mcp_settings.json"
if os.path.exists(mcp_settings_path):
with open(mcp_settings_path, "r", encoding="utf-8") as f:
final_mcp_settings = json.load(f)
logger.info(f"Loaded global mcp_settings for unique_id: {unique_id}")
else:
final_mcp_settings = []
logger.info(f"No mcp_settings found for unique_id: {unique_id}")
if final_mcp_settings is None:
final_mcp_settings = []
cache_key = self._get_cache_key(unique_id) cache_key = self._get_cache_key(unique_id)
@ -158,7 +112,7 @@ class FileLoadedAgentManager:
agent = self.agents[cache_key] agent = self.agents[cache_key]
# 动态更新 LLM 配置和系统设置(如果参数有变化) # 动态更新 LLM 配置和系统设置(如果参数有变化)
update_agent_llm(agent, model_name, api_key, model_server, generate_cfg, final_system_prompt, final_mcp_settings) update_agent_llm(agent, model_name, api_key, model_server, generate_cfg, final_system_prompt, mcp_settings)
logger.info(f"复用现有的助手实例缓存: {cache_key} (unique_id: {unique_id}") logger.info(f"复用现有的助手实例缓存: {cache_key} (unique_id: {unique_id}")
return agent return agent

View File

@ -1,11 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
System prompt loader utilities System prompt and MCP settings loader utilities
""" """
import os import os
import json
from typing import List, Dict, Optional
def load_system_prompt(project_dir: str, language: str = None, system_prompt: str=None) -> str:
def load_system_prompt(project_dir: str, language: str = None) -> str:
""" """
优先使用项目目录的system_prompt没有才使用默认的system_prompt_default.md 优先使用项目目录的system_prompt没有才使用默认的system_prompt_default.md
@ -16,9 +20,9 @@ def load_system_prompt(project_dir: str, language: str = None) -> str:
Returns: Returns:
str: 加载到的系统提示词内容如果都未找到则返回空字符串 str: 加载到的系统提示词内容如果都未找到则返回空字符串
""" """
system_prompt = None
# 1. 优先读取项目目录中的system_prompt # 1. 优先读取项目目录中的system_prompt
if not system_prompt:
system_prompt_file = os.path.join(project_dir, "system_prompt.md") system_prompt_file = os.path.join(project_dir, "system_prompt.md")
if os.path.exists(system_prompt_file): if os.path.exists(system_prompt_file):
try: try:
@ -40,7 +44,21 @@ def load_system_prompt(project_dir: str, language: str = None) -> str:
print(f"Failed to load default system prompt: {str(e)}") print(f"Failed to load default system prompt: {str(e)}")
system_prompt = None system_prompt = None
return system_prompt or "" readme = ""
readme_path = os.path.join(project_dir, "README.md")
if os.path.exists(readme_path):
with open(readme_path, "r", encoding="utf-8") as f:
readme = f.read().strip()
# 获取语言显示名称
language_display_map = {
'zh': '中文',
'en': 'English',
'ja': '日本語',
'jp': '日本語'
}
language_display = language_display_map.get(language, language if language else 'English')
return system_prompt.replace("{readme}", str(readme)).replace("{language}", language_display) or ""
def get_available_prompt_languages() -> list: def get_available_prompt_languages() -> list:
@ -63,6 +81,90 @@ def get_available_prompt_languages() -> list:
return available_languages return available_languages
def replace_mcp_placeholders(mcp_settings: List[Dict], dataset_dir: str) -> List[Dict]:
"""
替换 MCP 配置中的占位符
"""
if not mcp_settings or not isinstance(mcp_settings, list):
return mcp_settings
def replace_placeholders_in_obj(obj):
"""递归替换对象中的占位符"""
if isinstance(obj, dict):
for key, value in obj.items():
if key == 'args' and isinstance(value, list):
# 特别处理 args 列表
obj[key] = [item.replace('{dataset_dir}', dataset_dir) if isinstance(item, str) else item
for item in value]
elif isinstance(value, (dict, list)):
obj[key] = replace_placeholders_in_obj(value)
elif isinstance(value, str):
obj[key] = value.replace('{dataset_dir}', dataset_dir)
elif isinstance(obj, list):
return [replace_placeholders_in_obj(item) if isinstance(item, (dict, list)) else
item.replace('{dataset_dir}', dataset_dir) if isinstance(item, str) else item
for item in obj]
return obj
return replace_placeholders_in_obj(mcp_settings)
def load_mcp_settings(project_dir: str, mcp_settings: list=None) -> List[Dict]:
"""
优先使用项目目录的mcp_settings.json没有才使用默认的mcp/mcp_settings.json
Args:
project_dir: 项目目录路径
Returns:
List[Dict]: 加载到的MCP设置列表如果都未找到则返回空列表
Note:
支持在 mcp_settings.json args 中使用 {dataset_dir} 占位符
会在 init_modified_agent_service_with_files 中被替换为实际的路径
"""
# 1. 优先读取项目目录中的mcp_settings.json
if mcp_settings is None:
mcp_settings_file = os.path.join(project_dir, "mcp_settings.json")
if os.path.exists(mcp_settings_file):
try:
with open(mcp_settings_file, 'r', encoding='utf-8') as f:
mcp_settings = json.load(f)
print(f"Using project-specific mcp_settings")
except Exception as e:
print(f"Failed to load project mcp_settings: {str(e)}")
mcp_settings = None
# 2. 如果项目目录没有使用默认MCP设置
if mcp_settings is None:
try:
default_mcp_file = os.path.join("mcp", "mcp_settings.json")
if os.path.exists(default_mcp_file):
with open(default_mcp_file, 'r', encoding='utf-8') as f:
mcp_settings = json.load(f)
print(f"Using default mcp_settings from mcp folder")
else:
mcp_settings = []
print(f"No default mcp_settings found, using empty list")
except Exception as e:
print(f"Failed to load default mcp_settings: {str(e)}")
mcp_settings = []
# 确保返回的是列表格式
if mcp_settings is None:
mcp_settings = []
elif not isinstance(mcp_settings, list):
print(f"Warning: mcp_settings is not a list, converting to list format")
mcp_settings = [mcp_settings] if mcp_settings else []
# 计算 dataset_dir 用于替换 MCP 配置中的占位符
dataset_dir = os.path.join(project_dir, "dataset")
# 替换 MCP 配置中的 {dataset_dir} 占位符
mcp_settings = replace_mcp_placeholders(mcp_settings, dataset_dir)
print(mcp_settings)
return mcp_settings
def is_language_available(language: str) -> bool: def is_language_available(language: str) -> bool:
""" """
检查指定语言的提示词是否可用 检查指定语言的提示词是否可用