From 40aa71b9668361c69f36e122e4933b2d35da507d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Tue, 4 Nov 2025 23:16:21 +0800 Subject: [PATCH] add v2 api --- README.md | 81 ++++++++ api_v2_example.md | 177 +++++++++++++++++ fastapi_app.py | 384 ++++++++++++++++++++++++++++--------- mcp/rag_retrieve_server.py | 9 +- utils/api_models.py | 8 + 5 files changed, 571 insertions(+), 88 deletions(-) create mode 100644 api_v2_example.md diff --git a/README.md b/README.md index 0f9a9a2..e69b70a 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,83 @@ def submit_and_monitor_task(): submit_and_monitor_task() ``` +### 4. 项目目录树接口 + +#### 获取完整目录树 + +**端点**: `GET /api/v1/projects/tree` + +```bash +# 获取完整目录树 +curl "http://localhost:8001/api/v1/projects/tree" + +# 只显示目录结构(不包含文件) +curl "http://localhost:8001/api/v1/projects/tree?include_files=false" + +# 只显示data目录 +curl "http://localhost:8001/api/v1/projects/tree?filter_type=data" +``` + +**响应示例**: +```json +{ + "success": true, + "message": "目录树获取成功", + "tree": { + "name": "projects", + "path": "", + "type": "directory", + "children": [ + { + "name": "data", + "path": "data", + "type": "directory", + "children": [ + { + "name": "1624be71-5432-40bf-9758-f4aecffd4e9c", + "path": "data/1624be71-5432-40bf-9758-f4aecffd4e9c", + "type": "directory", + "children": [...] + } + ] + } + ], + "size": 0, + "modified_time": 1234567890 + }, + "stats": { + "total_directories": 15, + "total_files": 32, + "total_size": 1048576 + } +} +``` + +#### 获取子目录树结构 + +**端点**: `GET /api/v1/projects/subtree/{sub_path:path}` + +```bash +# 获取特定项目的目录结构 +curl "http://localhost:8001/api/v1/projects/subtree/data/1624be71-5432-40bf-9758-f4aecffd4e9c" + +# 只显示目录层级 +curl "http://localhost:8001/api/v1/projects/subtree/data/1624be71-5432-40bf-9758-f4aecffd4e9c?include_files=false" +``` + +**参数说明**: +- `sub_path`: 子目录路径,如 'data/1624be71-5432-40bf-9758-f4aecffd4e9c' +- `include_files`: 是否包含文件详情(默认true) +- `max_depth`: 最大深度限制(默认10) + +**功能特性**: +- 递归构建完整的目录树结构 +- 包含文件大小和修改时间信息 +- 支持过滤文件类型和目录层级 +- 提供统计信息(目录数、文件数、总大小) +- 安全的错误处理机制 +``` + --- ## 🗃️ 数据包结构 @@ -308,6 +385,10 @@ curl -X POST "http://localhost:8001/api/v1/tasks/cleanup?older_than_days=7" - `DELETE /api/v1/task/{task_id}` - 删除任务记录 - `POST /api/v1/project/cleanup` - 清理项目数据 +### 项目目录树接口 +- `GET /api/v1/projects/tree` - 获取projects文件夹完整目录树结构 +- `GET /api/v1/projects/subtree/{sub_path:path}` - 获取指定子目录的树结构 + ### 系统管理接口 - `GET /api/health` - 健康检查 - `GET /system/status` - 系统状态 diff --git a/api_v2_example.md b/api_v2_example.md new file mode 100644 index 0000000..7eeefe1 --- /dev/null +++ b/api_v2_example.md @@ -0,0 +1,177 @@ +# API v2 Usage Example + +## Overview +API v2 提供了简化的聊天完成接口,与 v1 接口共享核心逻辑,确保功能一致性和代码维护性。 + +## Endpoint +`POST /api/v2/chat/completions` + +## Description +This is a simplified version of the chat completions API that only requires essential parameters. All other configuration parameters are automatically fetched from the backend bot configuration API. + +## Code Architecture (重构后的代码结构) + +### 1. 公共函数提取 +- **`process_messages()`**: 处理消息列表,包括[ANSWER]分割和语言指令添加 +- **`create_agent_and_generate_response()`**: 创建agent并生成响应的公共逻辑 +- **`create_project_directory()`**: 创建项目目录的公共逻辑 +- **`extract_api_key_from_auth()`**: 从Authorization header中提取API key + +### 2. 不同的鉴权方式 +- **v1接口**: Authorization header中的API key直接用作模型API密钥 + ```bash + Authorization: Bearer your-model-api-key + ``` + +- **v2接口**: 需要有效的MD5哈希令牌进行认证 + ```bash + # 生成鉴权token + token=$(echo -n "master:your-bot-id" | md5sum | cut -d' ' -f1) + Authorization: Bearer ${token} + ``` + +### 3. 接口设计 +- **`/api/v1/chat/completions`**: 处理 `ChatRequest`,直接使用请求中的所有参数 +- **`/api/v2/chat/completions`**: 处理 `ChatRequestV2`,从后端获取配置参数 + +### 4. 设计优势 +- ✅ 最大化代码复用,减少重复逻辑 +- ✅ 保持不同的鉴权方式,满足不同需求 +- ✅ 清晰的函数分离,易于维护和测试 +- ✅ 统一的错误处理和响应格式 +- ✅ 异步HTTP请求,提高并发性能 +- ✅ 使用aiohttp替代requests,避免阻塞 + +## Request Format + +### Required Parameters +- `bot_id`: string - The target robot ID +- `messages`: array of message objects - Conversation messages + +### Optional Parameters +- `stream`: boolean - Whether to stream responses (default: false) +- `tool_response`: boolean - Whether to include tool responses (default: false) +- `language`: string - Response language (default: "ja") + +### Message Object Format +```json +{ + "role": "user" | "assistant" | "system", + "content": "string" +} +``` + +## Example Request + +### Basic Request +```bash +curl -X POST "http://localhost:8001/api/v2/chat/completions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer your-api-key" \ + -d '{ + "bot_id": "1624be71-5432-40bf-9758-f4aecffd4e9c", + "messages": [ + { + "role": "user", + "content": "Hello, how are you?" + } + ], + "language": "en", + "stream": false + }' +``` + +### Streaming Request +```bash +curl -X POST "http://localhost:8001/api/v2/chat/completions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer your-api-key" \ + -d '{ + "bot_id": "1624be71-5432-40bf-9758-f4aecffd4e9c", + "messages": [ + { + "role": "user", + "content": "Tell me about yourself" + } + ], + "language": "ja", + "stream": true + }' +``` + +## Backend Configuration + +The endpoint automatically fetches the following configuration from `{BACKEND_HOST}/v1/agent_bot_config/{bot_id}`: + +- `model`: Model name (e.g., "qwen/qwen3-next-80b-a3b-instruct") +- `model_server`: Model server URL +- `dataset_ids`: Array of dataset IDs for knowledge base +- `system_prompt`: System prompt for the agent +- `mcp_settings`: MCP configuration settings +- `robot_type`: Type of robot (e.g., "catalog_agent") +- `api_key`: API key for model server access + +## Authentication + +### v2 API Authentication (Required) + +The v2 endpoint requires a specific authentication token format: + +**Token Generation:** +```bash +# Method 1: Using environment variables (recommended) +export MASTERKEY="your-master-key" +export BOT_ID="1624be71-5432-40bf-9758-f4aecffd4e9c" +token=$(echo -n "${MASTERKEY}:${BOT_ID}" | md5sum | cut -d' ' -f1) + +# Method 2: Direct calculation +token=$(echo -n "master:1624be71-5432-40bf-9758-f4aecffd4e9c" | md5sum | cut -d' ' -f1) +``` + +**Usage:** +```bash +curl -X POST "http://localhost:8001/api/v2/chat/completions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${token}" \ + -d '{ + "bot_id": "1624be71-5432-40bf-9758-f4aecffd4e9c", + "messages": [ + { + "role": "user", + "content": "Hello" + } + ] + }' +``` + +**Authentication Errors:** +- `401 Unauthorized`: Missing Authorization header +- `403 Forbidden`: Invalid authentication token + +## Response Format + +Returns the same response format as `/api/v1/chat/completions`: + +### Non-Streaming Response +```json +{ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Response content here" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30 + } +} +``` + +### Streaming Response +Returns Server-Sent Events (SSE) format compatible with OpenAI's streaming API. \ No newline at end of file diff --git a/fastapi_app.py b/fastapi_app.py index b15dda0..bf6551c 100644 --- a/fastapi_app.py +++ b/fastapi_app.py @@ -3,6 +3,9 @@ import os import tempfile import shutil import uuid +import hashlib +import requests +import aiohttp from typing import AsyncGenerator, Dict, List, Optional, Union, Any from datetime import datetime @@ -36,6 +39,9 @@ from utils import ( get_global_agent_manager, init_global_agent_manager ) +# Import ChatRequestV2 directly from api_models +from utils.api_models import ChatRequestV2 + # Import modified_assistant from modified_assistant import update_agent_llm @@ -122,14 +128,14 @@ app.add_middleware( # Models are now imported from utils module -async def generate_stream_response(agent, messages, request) -> AsyncGenerator[str, None]: +async def generate_stream_response(agent, messages, tool_response: bool, model: str) -> AsyncGenerator[str, None]: """生成流式响应""" accumulated_content = "" chunk_id = 0 try: for response in agent.run(messages=messages): previous_content = accumulated_content - accumulated_content = get_content_from_messages(response, tool_response=request.tool_response) + accumulated_content = get_content_from_messages(response, tool_response=tool_response) # 计算新增的内容 if accumulated_content.startswith(previous_content): @@ -146,7 +152,7 @@ async def generate_stream_response(agent, messages, request) -> AsyncGenerator[s "id": f"chatcmpl-{chunk_id}", "object": "chat.completion.chunk", "created": int(__import__('time').time()), - "model": request.model, + "model": model, "choices": [{ "index": 0, "delta": { @@ -163,7 +169,7 @@ async def generate_stream_response(agent, messages, request) -> AsyncGenerator[s "id": f"chatcmpl-{chunk_id + 1}", "object": "chat.completion.chunk", "created": int(__import__('time').time()), - "model": request.model, + "model": model, "choices": [{ "index": 0, "delta": {}, @@ -441,107 +447,40 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = {"dataset_ids": ["project-123", "project-456"], "bot_id": "my-bot-002"} """ try: - # 从Authorization header中提取API key - api_key = None - if authorization: - # 移除 "Bearer " 前缀 - if authorization.startswith("Bearer "): - api_key = authorization[7:] - else: - api_key = authorization + # v1接口:从Authorization header中提取API key作为模型API密钥 + api_key = extract_api_key_from_auth(authorization) # 获取bot_id(必需参数) bot_id = request.bot_id if not bot_id: raise HTTPException(status_code=400, detail="bot_id is required") - # 获取dataset_ids(可选参数,当提供时必须是数组) - dataset_ids_list = request.dataset_ids - project_dir = None - - # 只有当提供了 dataset_ids 时才创建机器人目录并合并数据 - if dataset_ids_list and len(dataset_ids_list) > 0: - from utils.multi_project_manager import create_robot_project - project_dir = create_robot_project(dataset_ids_list, bot_id) + # 创建项目目录(如果有dataset_ids) + project_dir = create_project_directory(request.dataset_ids, bot_id) # 收集额外参数作为 generate_cfg exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id'} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} - # 从全局管理器获取或创建助手实例(配置读取逻辑已在agent_manager内部处理) - agent = await agent_manager.get_or_create_agent( + # 处理消息 + messages = process_messages(request.messages, request.language) + + # 调用公共的agent创建和响应生成逻辑 + return await create_agent_and_generate_response( bot_id=bot_id, - project_dir=project_dir, - model_name=request.model, api_key=api_key, + messages=messages, + stream=request.stream, + tool_response=request.tool_response, + model_name=request.model, model_server=request.model_server, - generate_cfg=generate_cfg, language=request.language, system_prompt=request.system_prompt, mcp_settings=request.mcp_settings, - robot_type=request.robot_type + robot_type=request.robot_type, + project_dir=project_dir, + generate_cfg=generate_cfg ) - # 构建包含项目信息的消息上下文 - messages = [] - for msg in request.messages: - if msg.role == "assistant": - # 对assistant消息进行[ANSWER]分割处理,只保留最后一段 - content_parts = msg.content.split("[ANSWER]") - if content_parts: - # 取最后一段非空文本 - last_part = content_parts[-1].strip() - messages.append({"role": msg.role, "content": last_part}) - else: - messages.append({"role": msg.role, "content": msg.content}) - else: - messages.append({"role": msg.role, "content": msg.content}) - - # 在最后一条消息的末尾追加回复语言 - if messages and request.language: - language_map = { - 'zh': '请用中文回复', - 'en': 'Please reply in English', - 'ja': '日本語で回答してください', - 'jp': '日本語で回答してください' - } - language_instruction = language_map.get(request.language.lower(), '') - if language_instruction: - # 在最后一条消息末尾追加语言指令 - messages[-1]['content'] = messages[-1]['content'] + f"\n\n{language_instruction}。" - - # 根据stream参数决定返回流式还是非流式响应 - if request.stream: - return StreamingResponse( - generate_stream_response(agent, messages, request), - media_type="text/event-stream", - headers={"Cache-Control": "no-cache", "Connection": "keep-alive"} - ) - else: - # 非流式响应 - final_responses = agent.run_nonstream(messages) - - if final_responses and len(final_responses) > 0: - # 使用 get_content_from_messages 处理响应,支持 tool_response 参数 - content = get_content_from_messages(final_responses, tool_response=request.tool_response) - - # 构造OpenAI格式的响应 - return ChatResponse( - choices=[{ - "index": 0, - "message": { - "role": "assistant", - "content": content - }, - "finish_reason": "stop" - }], - usage={ - "prompt_tokens": sum(len(msg.content) for msg in request.messages), - "completion_tokens": len(content), - "total_tokens": sum(len(msg.content) for msg in request.messages) + len(content) - } - ) - else: - raise HTTPException(status_code=500, detail="No response from agent") except Exception as e: import traceback @@ -551,6 +490,277 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") +async def fetch_bot_config(bot_id: str) -> Dict[str, Any]: + """获取机器人配置从后端API""" + try: + backend_host = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai") + url = f"{backend_host}/v1/agent_bot_config/{bot_id}" + + auth_token = generate_v2_auth_token(bot_id) + headers = { + "content-type": "application/json", + "authorization": f"Bearer {auth_token}" + } + print(url,headers) + # 使用异步HTTP请求 + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers, timeout=30) as response: + if response.status != 200: + raise HTTPException( + status_code=400, + detail=f"Failed to fetch bot config: API returned status code {response.status}" + ) + + # 解析响应 + response_data = await response.json() + + if not response_data.get("success"): + raise HTTPException( + status_code=400, + detail=f"Failed to fetch bot config: {response_data.get('message', 'Unknown error')}" + ) + + return response_data.get("data", {}) + + except aiohttp.ClientError as e: + raise HTTPException( + status_code=500, + detail=f"Failed to connect to backend API: {str(e)}" + ) + except Exception as e: + if isinstance(e, HTTPException): + raise + raise HTTPException( + status_code=500, + detail=f"Failed to fetch bot config: {str(e)}" + ) + + +def process_messages(messages: List[Message], language: Optional[str] = None) -> List[Dict[str, str]]: + """处理消息列表,包括[ANSWER]分割和语言指令添加""" + processed_messages = [] + + # 处理每条消息 + for msg in messages: + if msg.role == "assistant": + # 对assistant消息进行[ANSWER]分割处理,只保留最后一段 + content_parts = msg.content.split("[ANSWER]") + if content_parts: + # 取最后一段非空文本 + last_part = content_parts[-1].strip() + processed_messages.append({"role": msg.role, "content": last_part}) + else: + processed_messages.append({"role": msg.role, "content": msg.content}) + else: + processed_messages.append({"role": msg.role, "content": msg.content}) + + # 在最后一条消息的末尾追加回复语言 + if processed_messages and language: + language_map = { + 'zh': '请用中文回复', + 'en': 'Please reply in English', + 'ja': '日本語で回答してください', + 'jp': '日本語で回答してください' + } + language_instruction = language_map.get(language.lower(), '') + if language_instruction: + # 在最后一条消息末尾追加语言指令 + processed_messages[-1]['content'] = processed_messages[-1]['content'] + f"\n\n{language_instruction}。" + + return processed_messages + + +async def create_agent_and_generate_response( + bot_id: str, + api_key: str, + messages: List[Dict[str, str]], + stream: bool, + tool_response: bool, + model_name: str, + model_server: str, + language: str, + system_prompt: Optional[str], + mcp_settings: Optional[List[Dict]], + robot_type: str, + project_dir: Optional[str] = None, + generate_cfg: Optional[Dict] = None +) -> Union[ChatResponse, StreamingResponse]: + """创建agent并生成响应的公共逻辑""" + if generate_cfg is None: + generate_cfg = {} + + # 从全局管理器获取或创建助手实例 + agent = await agent_manager.get_or_create_agent( + bot_id=bot_id, + project_dir=project_dir, + model_name=model_name, + api_key=api_key, + model_server=model_server, + generate_cfg=generate_cfg, + language=language, + system_prompt=system_prompt, + mcp_settings=mcp_settings, + robot_type=robot_type + ) + + # 根据stream参数决定返回流式还是非流式响应 + if stream: + return StreamingResponse( + generate_stream_response(agent, messages, tool_response, model_name), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "Connection": "keep-alive"} + ) + else: + # 非流式响应 + final_responses = agent.run_nonstream(messages) + + if final_responses and len(final_responses) > 0: + # 使用 get_content_from_messages 处理响应,支持 tool_response 参数 + content = get_content_from_messages(final_responses, tool_response=tool_response) + + # 构造OpenAI格式的响应 + return ChatResponse( + choices=[{ + "index": 0, + "message": { + "role": "assistant", + "content": content + }, + "finish_reason": "stop" + }], + usage={ + "prompt_tokens": sum(len(msg.get("content", "")) for msg in messages), + "completion_tokens": len(content), + "total_tokens": sum(len(msg.get("content", "")) for msg in messages) + len(content) + } + ) + else: + raise HTTPException(status_code=500, detail="No response from agent") + + +def create_project_directory(dataset_ids: List[str], bot_id: str) -> Optional[str]: + """创建项目目录的公共逻辑""" + if not dataset_ids: + return None + + try: + from utils.multi_project_manager import create_robot_project + return create_robot_project(dataset_ids, bot_id) + except Exception as e: + print(f"Error creating project directory: {e}") + return None + + +def extract_api_key_from_auth(authorization: Optional[str]) -> Optional[str]: + """从Authorization header中提取API key""" + if not authorization: + return None + + # 移除 "Bearer " 前缀 + if authorization.startswith("Bearer "): + return authorization[7:] + else: + return authorization + + +def generate_v2_auth_token(bot_id: str) -> str: + """生成v2接口的认证token""" + masterkey = os.getenv("MASTERKEY", "master") + token_input = f"{masterkey}:{bot_id}" + return hashlib.md5(token_input.encode()).hexdigest() + + +@app.post("/api/v2/chat/completions") +async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[str] = Header(None)): + """ + Chat completions API v2 with simplified parameters. + Only requires messages, stream, tool_response, bot_id, and language parameters. + Other parameters are fetched from the backend bot configuration API. + + Args: + request: ChatRequestV2 containing only essential parameters + authorization: Authorization header for authentication (different from v1) + + Returns: + Union[ChatResponse, StreamingResponse]: Chat completion response or stream + + Required Parameters: + - bot_id: str - 目标机器人ID + - messages: List[Message] - 对话消息列表 + + Optional Parameters: + - stream: bool - 是否流式输出,默认false + - tool_response: bool - 是否包含工具响应,默认false + - language: str - 回复语言,默认"ja" + + Authentication: + - Requires valid MD5 hash token: MD5(MASTERKEY:bot_id) + - Authorization header should contain: Bearer {token} + - Uses MD5 hash of MASTERKEY:bot_id for backend API authentication + - Optionally uses API key from bot config for model access + """ + try: + # 获取bot_id(必需参数) + bot_id = request.bot_id + if not bot_id: + raise HTTPException(status_code=400, detail="bot_id is required") + + # v2接口鉴权验证 + expected_token = generate_v2_auth_token(bot_id) + provided_token = extract_api_key_from_auth(authorization) + + if not provided_token: + raise HTTPException( + status_code=401, + detail="Authorization header is required for v2 API" + ) + + if provided_token != expected_token: + raise HTTPException( + status_code=403, + detail=f"Invalid authentication token. Expected: {expected_token[:8]}..., Provided: {provided_token[:8]}..." + ) + + # 从后端API获取机器人配置(使用v2的鉴权方式) + bot_config = await fetch_bot_config(bot_id) + + # v2接口:API密钥优先从后端配置获取,其次才从Authorization header获取 + # 注意:这里的Authorization header已经用于鉴权,不再作为API key使用 + api_key = bot_config.get("api_key") + + # 创建项目目录(从后端配置获取dataset_ids) + project_dir = create_project_directory(bot_config.get("dataset_ids", []), bot_id) + + # 处理消息 + messages = process_messages(request.messages, request.language) + + # 调用公共的agent创建和响应生成逻辑 + return await create_agent_and_generate_response( + bot_id=bot_id, + api_key=api_key, + messages=messages, + stream=request.stream, + tool_response=request.tool_response, + model_name=bot_config.get("model", "qwen/qwen3-next-80b-a3b-instruct"), + model_server=bot_config.get("model_server", ""), + language=request.language or bot_config.get("language", "ja"), + system_prompt=bot_config.get("system_prompt"), + mcp_settings=bot_config.get("mcp_settings", []), + robot_type=bot_config.get("robot_type", "agent"), + project_dir=project_dir, + generate_cfg={} # v2接口不传递额外的generate_cfg + ) + + except HTTPException: + raise + except Exception as e: + import traceback + error_details = traceback.format_exc() + print(f"Error in chat_completions_v2: {str(e)}") + print(f"Full traceback: {error_details}") + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + @app.post("/api/v1/upload") async def upload_file(file: UploadFile = File(...)): """ diff --git a/mcp/rag_retrieve_server.py b/mcp/rag_retrieve_server.py index d4fe9ce..4b6ea16 100644 --- a/mcp/rag_retrieve_server.py +++ b/mcp/rag_retrieve_server.py @@ -5,6 +5,7 @@ RAG检索MCP服务器 """ import asyncio +import hashlib import json import sys import os @@ -46,8 +47,14 @@ def rag_retrieve(query: str, top_k: int = 50) -> Dict[str, Any]: ] } + # 获取masterkey并生成认证token + masterkey = os.getenv("MASTERKEY", "master") + token_input = f"{masterkey}:{bot_id}" + auth_token = hashlib.md5(token_input.encode()).hexdigest() + headers = { - "content-type": "application/json" + "content-type": "application/json", + "authorization": f"Bearer {auth_token}" } data = { "query": query, diff --git a/utils/api_models.py b/utils/api_models.py index 5cf5e9b..12a8555 100644 --- a/utils/api_models.py +++ b/utils/api_models.py @@ -53,6 +53,14 @@ class ChatRequest(BaseModel): robot_type: Optional[str] = "agent" +class ChatRequestV2(BaseModel): + messages: List[Message] + stream: Optional[bool] = False + tool_response: Optional[bool] = False + bot_id: str + language: Optional[str] = "ja" + + class FileProcessRequest(BaseModel): unique_id: str files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)")