From 432fb214d4788990552fef892849a8ef1ad0b5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 30 Oct 2025 21:50:43 +0800 Subject: [PATCH] suport dataset_ids --- fastapi_app.py | 26 ++++++++++++++------------ utils/file_loaded_agent_manager.py | 4 ++-- utils/prompt_loader.py | 20 ++++++++++++-------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/fastapi_app.py b/fastapi_app.py index 9d020ea..75b0422 100644 --- a/fastapi_app.py +++ b/fastapi_app.py @@ -419,23 +419,25 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = Chat completions API similar to OpenAI, supports both streaming and non-streaming Args: - request: ChatRequest containing messages, model, dataset_ids (required list), required bot_id, system_prompt, mcp_settings, and files + request: ChatRequest containing messages, model, dataset_ids (optional list), required bot_id, system_prompt, mcp_settings, and files authorization: Authorization header containing API key (Bearer ) Returns: Union[ChatResponse, StreamingResponse]: Chat completion response or stream Notes: - - dataset_ids: 必需参数,必须是项目ID列表(单个项目也使用数组格式) + - dataset_ids: 可选参数,当提供时必须是项目ID列表(单个项目也使用数组格式) - bot_id: 必需参数,机器人ID,用于创建项目目录 - - 所有请求都会创建机器人项目目录:projects/robot/{bot_id}/ + - 只有当提供 dataset_ids 时才会创建机器人项目目录:projects/robot/{bot_id}/ - 支持多知识库合并,自动处理文件夹重名冲突 Required Parameters: - - dataset_ids: List[str] - 源知识库项目ID列表(单个项目也使用数组格式) - bot_id: str - 目标机器人项目ID + Optional Parameters: + - dataset_ids: List[str] - 源知识库项目ID列表(单个项目也使用数组格式) Example: + {"bot_id": "my-bot-001"} {"dataset_ids": ["project-123"], "bot_id": "my-bot-001"} {"dataset_ids": ["project-123", "project-456"], "bot_id": "my-bot-002"} """ @@ -449,19 +451,19 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = else: api_key = authorization - # 获取dataset_ids(必需参数,必须是数组) - dataset_ids_list = request.dataset_ids - if not dataset_ids_list: - raise HTTPException(status_code=400, detail="dataset_ids is required and must be a non-empty list") - # 获取bot_id(必需参数) bot_id = request.bot_id if not bot_id: raise HTTPException(status_code=400, detail="bot_id is required") - # 创建机器人目录并合并数据 - from utils.multi_project_manager import create_robot_project - project_dir = create_robot_project(dataset_ids_list, bot_id) + # 获取dataset_ids(可选参数,当提供时必须是数组) + dataset_ids_list = request.dataset_ids + project_dir = None + + # 只有当提供了 dataset_ids 时才创建机器人目录并合并数据 + if dataset_ids_list and len(dataset_ids_list) > 0: + from utils.multi_project_manager import create_robot_project + project_dir = create_robot_project(dataset_ids_list, bot_id) # 收集额外参数作为 generate_cfg exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id'} diff --git a/utils/file_loaded_agent_manager.py b/utils/file_loaded_agent_manager.py index 1e12285..810f4fc 100644 --- a/utils/file_loaded_agent_manager.py +++ b/utils/file_loaded_agent_manager.py @@ -92,7 +92,7 @@ class FileLoadedAgentManager: async def get_or_create_agent(self, bot_id: str, - project_dir: str, + project_dir: Optional[str], model_name: str = "qwen3-next", api_key: Optional[str] = None, model_server: Optional[str] = None, @@ -105,7 +105,7 @@ class FileLoadedAgentManager: Args: bot_id: 项目的唯一标识符 - project_dir: 项目目录路径,用于读取system_prompt.md和mcp_settings.json + project_dir: 项目目录路径,用于读取system_prompt.md和mcp_settings.json,可以为None model_name: 模型名称 api_key: API 密钥 model_server: 模型服务器地址 diff --git a/utils/prompt_loader.py b/utils/prompt_loader.py index 9b2a062..f75dfb7 100644 --- a/utils/prompt_loader.py +++ b/utils/prompt_loader.py @@ -25,7 +25,7 @@ def load_system_prompt(project_dir: str, language: str = None, system_prompt: st 优先使用项目目录的system_prompt_catalog_agent.md,没有才使用默认的system_prompt_default.md Args: - project_dir: 项目目录路径 + project_dir: 项目目录路径,可以为None language: 语言代码,如 'zh', 'en', 'jp' 等(此参数将被忽略) system_prompt: 可选的系统提示词,优先级高于项目配置 robot_type: 机器人类型,取值 AGENT/CATALOG_AGENT @@ -45,11 +45,13 @@ def load_system_prompt(project_dir: str, language: str = None, system_prompt: st system_prompt_default = None readme = "" - readme_path = os.path.join(project_dir, "README.md") - if os.path.exists(readme_path): - with open(readme_path, "r", encoding="utf-8") as f: - readme = f.read().strip() - system_prompt_default = system_prompt_default.replace("{readme}", str(readme)) + # 只有当 project_dir 不为 None 时才尝试读取 README.md + if project_dir is not None: + readme_path = os.path.join(project_dir, "README.md") + if os.path.exists(readme_path): + with open(readme_path, "r", encoding="utf-8") as f: + readme = f.read().strip() + system_prompt_default = system_prompt_default.replace("{readme}", str(readme)) return system_prompt_default.replace("{language}", language_display).replace("{extra_prompt}", system_prompt or "").replace('{bot_id}', bot_id) or "" else: @@ -154,8 +156,10 @@ def load_mcp_settings(project_dir: str, mcp_settings: list=None, bot_id: str="", merged_settings = [merged_settings] if merged_settings else [] # 计算 dataset_dir 用于替换 MCP 配置中的占位符 - dataset_dir = os.path.join(project_dir, "dataset") + # 只有当 project_dir 不为 None 时才计算 dataset_dir + dataset_dir = os.path.join(project_dir, "dataset") if project_dir is not None else None # 替换 MCP 配置中的 {dataset_dir} 占位符 - merged_settings = replace_mcp_placeholders(merged_settings, dataset_dir, bot_id) + if dataset_dir is not None: + merged_settings = replace_mcp_placeholders(merged_settings, dataset_dir, bot_id) return merged_settings