diff --git a/ZIP_PROJECT_README.md b/ZIP_PROJECT_README.md new file mode 100644 index 0000000..29c2246 --- /dev/null +++ b/ZIP_PROJECT_README.md @@ -0,0 +1,157 @@ +# ZIP项目功能说明 + +## 概述 + +此功能实现了完全无状态的项目管理,用户必须通过在chat接口的extra参数中提供`zip_url`来动态加载项目数据。系统不再支持传统的project_registry.json配置方式。 + +## 功能特性 + +- **极简无状态项目加载**: 只需提供ZIP URL,系统自动处理所有逻辑 +- **自动缓存**: 相同URL的ZIP文件只会下载一次,提高性能 +- **智能解压**: 自动将ZIP文件解压到项目目录,保持原始结构 +- **自动项目标识**: 基于URL哈希自动生成唯一项目标识 + +## API使用方法 + +### Chat接口 + +直接在请求体中使用最外层参数: + +```json +{ + "messages": [ + { + "role": "user", + "content": "请分析项目中的数据文件" + } + ], + "model": "qwen3-next", + "model_server": "https://openrouter.ai/api/v1", + "zip_url": "https://example.com/my-project.zip", + "stream": false +} +``` + +### 参数说明 + +- `model_server`: 模型服务器地址(可选) +- `zip_url`: ZIP文件的下载链接(必需) +- `extra`: 其他额外参数(可选) + +### 系统管理接口 + +#### 清理缓存 +```bash +POST /system/cleanup-cache +``` +清理所有下载的ZIP文件缓存。 + +#### 系统状态 +```bash +GET /system/status +``` +获取系统状态信息,包括agent池状态。 + +## 工作流程 + +1. **参数验证**: 检查是否提供了必需的zip_url参数 +2. **模型配置**: 如果提供了model_server,将其配置到LLM +3. **生成项目标识**: 基于URL哈希自动生成唯一项目标识 +4. **下载ZIP**: 系统根据zip_url下载ZIP文件到缓存目录 +5. **缓存检查**: 如果URL已被缓存,直接使用缓存文件 +6. **解压文件**: 将ZIP文件解压到`projects/{url_hash}/`目录,保持原始目录结构 +7. **项目访问**: Agent可以直接访问解压后的所有文件和目录 + +## 缓存机制 + +- ZIP文件基于URL的MD5哈希值进行缓存 +- 缓存位置: `projects/_cache/` +- 项目目录: `projects/{project_id}_{hash}/` +- 相同URL不会重复下载,提高性能 + +## 目录结构 + +``` +projects/ +├── _cache/ # ZIP文件缓存 +│ ├── abc123.zip # 基于URL哈希的ZIP文件 +│ └── def456.zip +├── abc123/ # 解压后的项目目录(URL哈希) +│ ├── 原始文件和目录结构... +│ └── 保持ZIP中的完整结构 +└── def456/ + └── 原始文件和目录结构... +``` + +## 错误处理 + +- 缺少zip_url: 返回400错误 +- 无效URL: 返回400错误 +- 下载失败: 返回400错误 +- 解压失败: 返回400错误 + +## 测试 + +运行测试脚本验证功能: + +```bash +python test_zip_feature.py +``` + +## 注意事项 + +1. **必需参数**: 所有请求都必须提供zip_url参数 +2. **可选参数**: model_server参数可选,用于指定自定义模型服务器 +3. **URL格式**: zip_url必须是有效的HTTP/HTTPS URL +4. **文件大小**: 建议ZIP文件不超过100MB +5. **安全性**: 确保ZIP文件来源可信 +6. **网络**: 需要能够访问zip_url指向的资源 +7. **自动标识**: 系统自动基于URL生成项目标识,无需手动指定 + +## 示例使用场景 + +### 1. 临时项目分析 +```python +import requests + +response = requests.post("http://localhost:8000/chat/completions", json={ + "messages": [{"role": "user", "content": "分析这个数据集"}], + "model_server": "https://openrouter.ai/api/v1", + "zip_url": "https://dataset.example.com/analysis-data.zip" +}) +``` + +### 2. 多项目对比 +```python +# 项目1 +response1 = requests.post("http://localhost:8000/chat/completions", json={ + "messages": [{"role": "user", "content": "总结项目1的特点"}], + "model_server": "https://openrouter.ai/api/v1", + "zip_url": "https://data.example.com/project1.zip" +}) + +# 项目2 +response2 = requests.post("http://localhost:8000/chat/completions", json={ + "messages": [{"role": "user", "content": "总结项目2的特点"}], + "model_server": "https://openrouter.ai/api/v1", + "zip_url": "https://data.example.com/project2.zip" +}) +``` + +### 3. 使用默认模型服务器 +```python +# 不指定model_server,使用默认配置 +response = requests.post("http://localhost:8000/chat/completions", json={ + "messages": [{"role": "user", "content": "分析项目数据"}], + "zip_url": "https://data.example.com/project.zip" +}) +``` + +## 技术实现 + +- **下载**: 使用requests库流式下载 +- **解压**: 使用zipfile模块 +- **缓存**: 基于URL哈希的文件缓存 +- **并发安全**: 支持多并发请求处理 + +这个功能实现了极简的无状态项目管理,用户只需在最外层提供model_server和zip_url参数,系统会自动处理模型配置、项目标识生成、下载、解压和缓存,最大程度简化了项目管理的复杂度。 \ No newline at end of file diff --git a/__pycache__/fastapi_app.cpython-312.pyc b/__pycache__/fastapi_app.cpython-312.pyc index 2a1a252..d34da56 100644 Binary files a/__pycache__/fastapi_app.cpython-312.pyc and b/__pycache__/fastapi_app.cpython-312.pyc differ diff --git a/__pycache__/gbase_agent.cpython-312.pyc b/__pycache__/gbase_agent.cpython-312.pyc index 3b844b9..4d01082 100644 Binary files a/__pycache__/gbase_agent.cpython-312.pyc and b/__pycache__/gbase_agent.cpython-312.pyc differ diff --git a/__pycache__/test_zip_feature.cpython-312.pyc b/__pycache__/test_zip_feature.cpython-312.pyc new file mode 100644 index 0000000..9af629f Binary files /dev/null and b/__pycache__/test_zip_feature.cpython-312.pyc differ diff --git a/__pycache__/zip_project_handler.cpython-312.pyc b/__pycache__/zip_project_handler.cpython-312.pyc new file mode 100644 index 0000000..21236b0 Binary files /dev/null and b/__pycache__/zip_project_handler.cpython-312.pyc differ diff --git a/fastapi_app.py b/fastapi_app.py index 5c16e4a..4281e08 100644 --- a/fastapi_app.py +++ b/fastapi_app.py @@ -40,7 +40,7 @@ def get_content_from_messages(messages: List[dict]) -> str: from agent_pool import (get_agent_from_pool, init_global_agent_pool, release_agent_to_pool) from gbase_agent import init_agent_service_universal, update_agent_llm -from project_config import project_manager +from zip_project_handler import zip_handler app = FastAPI(title="Database Assistant API", version="1.0.0") @@ -57,6 +57,8 @@ class ChatRequest(BaseModel): messages: List[Message] model: str = "qwen3-next" api_key: Optional[str] = None + model_server: Optional[str] = None + zip_url: Optional[str] = None extra: Optional[Dict] = None stream: Optional[bool] = False file_url: Optional[str] = None @@ -154,35 +156,30 @@ async def chat_completions(request: ChatRequest): """ agent = None try: - # 从extra字段中获取project_id - if not request.extra or 'project_id' not in request.extra: - raise HTTPException(status_code=400, detail="project_id is required in extra field") + # 从最外层获取zip_url参数 + zip_url = request.zip_url - project_id = request.extra['project_id'] + if not zip_url: + raise HTTPException(status_code=400, detail="zip_url is required") - # 验证项目访问权限 - if not project_manager.validate_project_access(project_id): - raise HTTPException(status_code=404, detail=f"Project {project_id} not found or inactive") - - # 获取项目数据目录 - project_dir = project_manager.get_project_dir(project_id) + # 使用ZIP URL获取项目数据 + print(f"从ZIP URL加载项目: {zip_url}") + project_dir = zip_handler.get_project_from_zip(zip_url) + if not project_dir: + raise HTTPException(status_code=400, detail=f"Failed to load project from ZIP URL: {zip_url}") # 从实例池获取助手实例 agent = await get_agent_from_pool(timeout=30.0) - # 准备LLM配置,从extra字段中移除project_id - llm_extra = request.extra.copy() if request.extra else {} - llm_extra.pop('project_id', None) # 移除project_id,不传递给LLM - - # 动态设置请求的模型,支持从接口传入api_key和extra参数 - update_agent_llm(agent, request.model, request.api_key, llm_extra) + # 动态设置请求的模型,支持从接口传入api_key、model_server和extra参数 + update_agent_llm(agent, request.model, request.api_key, request.model_server) # 构建包含项目信息的消息上下文 messages = [ # 项目信息系统消息 { "role": "user", - "content": f"当前项目ID: {project_id},数据目录: {project_dir}。所有文件路径中的 '[当前数据目录]' 请替换为: {project_dir}" + "content": f"当前项目来自ZIP URL: {zip_url},项目目录: {project_dir}。所有文件路径中的 '[当前数据目录]' 请替换为: {project_dir}" }, # 用户消息批量转换 *[{"role": msg.role, "content": msg.content} for msg in request.messages] @@ -270,6 +267,16 @@ async def system_status(): } +@app.post("/system/cleanup-cache") +async def cleanup_cache(): + """清理ZIP文件缓存""" + try: + zip_handler.cleanup_cache() + return {"message": "缓存清理成功"} + except Exception as e: + raise HTTPException(status_code=500, detail=f"缓存清理失败: {str(e)}") + + @app.on_event("startup") async def startup_event(): """应用启动时初始化助手实例池""" diff --git a/gbase_agent.py b/gbase_agent.py index f682094..ebab503 100644 --- a/gbase_agent.py +++ b/gbase_agent.py @@ -139,24 +139,22 @@ def init_agent_service_universal(): return bot -def update_agent_llm(agent, model_name: str, api_key: str = None, extra: Dict = None): +def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None): """动态更新助手实例的LLM,支持从接口传入参数""" # 获取基础配置 llm_config = { "model": model_name, "api_key": api_key, + "model_server": model_server } - # 如果接口传入了extra参数,则合并到配置中 - if extra is not None: - llm_config.update(extra) # 创建LLM实例,确保不是字典 - if "llm_class" in llm_config: - llm_instance = llm_config.get("llm_class", TextChatAtOAI)(llm_config) - else: - # 使用默认的 TextChatAtOAI 类 - llm_instance = TextChatAtOAI(llm_config) + #if "llm_class" in llm_config: + # llm_instance = llm_config.get("llm_class", TextChatAtOAI)(llm_config) + #else: + # 使用默认的 TextChatAtOAI 类 + llm_instance = TextChatAtOAI(llm_config) # 动态设置LLM agent.llm = llm_instance diff --git a/project_config.py b/project_config.py deleted file mode 100644 index 0277cd3..0000000 --- a/project_config.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 -""" -项目配置管理系统 -负责管理项目ID到数据目录的映射,以及项目访问权限控制 -""" - -import json -import os -from typing import Dict, Optional, List -from dataclasses import dataclass, asdict - - -@dataclass -class ProjectConfig: - """项目配置数据类""" - project_id: str - data_dir: str - name: str - description: str = "" - allowed_file_types: List[str] = None - max_file_size_mb: int = 100 - is_active: bool = True - - def __post_init__(self): - if self.allowed_file_types is None: - self.allowed_file_types = [".json", ".txt", ".csv", ".pdf"] - - -class ProjectManager: - """项目管理器""" - - def __init__(self, config_file: str = "./projects/project_registry.json"): - self.config_file = config_file - self.projects: Dict[str, ProjectConfig] = {} - self._ensure_config_dir() - self._load_projects() - - def _ensure_config_dir(self): - """确保配置目录存在""" - config_dir = os.path.dirname(self.config_file) - if not os.path.exists(config_dir): - os.makedirs(config_dir, exist_ok=True) - - def _load_projects(self): - """从配置文件加载项目""" - if os.path.exists(self.config_file): - try: - with open(self.config_file, 'r', encoding='utf-8') as f: - data = json.load(f) - for project_data in data.get('projects', []): - config = ProjectConfig(**project_data) - self.projects[config.project_id] = config - except Exception as e: - print(f"加载项目配置失败: {e}") - self._create_default_config() - else: - self._create_default_config() - - def _create_default_config(self): - """创建默认配置""" - default_project = ProjectConfig( - project_id="default", - data_dir="./data", - name="默认项目", - description="默认数据项目" - ) - self.projects["default"] = default_project - self._save_projects() - - def _save_projects(self): - """保存项目配置到文件""" - data = { - "projects": [asdict(project) for project in self.projects.values()] - } - try: - with open(self.config_file, 'w', encoding='utf-8') as f: - json.dump(data, f, ensure_ascii=False, indent=2) - except Exception as e: - print(f"保存项目配置失败: {e}") - - def get_project(self, project_id: str) -> Optional[ProjectConfig]: - """获取项目配置""" - return self.projects.get(project_id) - - def add_project(self, config: ProjectConfig) -> bool: - """添加项目""" - if config.project_id in self.projects: - return False - - # 确保数据目录存在 - if not os.path.isabs(config.data_dir): - config.data_dir = os.path.abspath(config.data_dir) - - os.makedirs(config.data_dir, exist_ok=True) - - self.projects[config.project_id] = config - self._save_projects() - return True - - def update_project(self, project_id: str, **kwargs) -> bool: - """更新项目配置""" - if project_id not in self.projects: - return False - - project = self.projects[project_id] - for key, value in kwargs.items(): - if hasattr(project, key): - setattr(project, key, value) - - self._save_projects() - return True - - def delete_project(self, project_id: str) -> bool: - """删除项目""" - if project_id not in self.projects: - return False - - del self.projects[project_id] - self._save_projects() - return True - - def list_projects(self) -> List[ProjectConfig]: - """列出所有项目""" - return list(self.projects.values()) - - def get_project_dir(self, project_id: str) -> str: - """获取项目数据目录""" - project = self.get_project(project_id) - if project: - return project.data_dir - - # 如果项目不存在,创建默认目录结构 - default_dir = f"./projects/{project_id}/data" - os.makedirs(default_dir, exist_ok=True) - - # 自动创建新项目配置 - new_project = ProjectConfig( - project_id=project_id, - data_dir=default_dir, - name=f"项目 {project_id}", - description=f"自动创建的项目 {project_id}" - ) - self.add_project(new_project) - - return default_dir - - def validate_project_access(self, project_id: str) -> bool: - """验证项目访问权限""" - project = self.get_project(project_id) - return project and project.is_active - - -# 全局项目管理器实例 -project_manager = ProjectManager() \ No newline at end of file diff --git a/projects/project_registry.json b/project_registry.json similarity index 100% rename from projects/project_registry.json rename to project_registry.json diff --git a/projects/demo-project/all_hp_product_spec_book2506/document.txt b/projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/document.txt similarity index 100% rename from projects/demo-project/all_hp_product_spec_book2506/document.txt rename to projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/document.txt diff --git a/projects/demo-project/all_hp_product_spec_book2506/schema.json b/projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/schema.json similarity index 100% rename from projects/demo-project/all_hp_product_spec_book2506/schema.json rename to projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/schema.json diff --git a/projects/demo-project/all_hp_product_spec_book2506/serialization.txt b/projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/serialization.txt similarity index 100% rename from projects/demo-project/all_hp_product_spec_book2506/serialization.txt rename to projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/serialization.txt diff --git a/projects/_cache/7f2fdcb1bad17323.zip b/projects/_cache/7f2fdcb1bad17323.zip new file mode 100644 index 0000000..d089722 Binary files /dev/null and b/projects/_cache/7f2fdcb1bad17323.zip differ diff --git a/test_zip_feature.py b/test_zip_feature.py new file mode 100644 index 0000000..2d26f61 --- /dev/null +++ b/test_zip_feature.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +""" +测试ZIP URL功能的脚本 +""" + +import requests +import json +import os + + +def test_missing_zip_url(): + """测试缺少zip_url参数的错误处理""" + + base_url = "http://localhost:8000" + + print("测试缺少zip_url参数...") + + # 缺少zip_url的请求 + test_request = { + "messages": [ + { + "role": "user", + "content": "测试请求" + } + ], + "model": "qwen3-next", + # 缺少zip_url参数 + "stream": False + } + + try: + response = requests.post( + f"{base_url}/chat/completions", + json=test_request, + timeout=10 + ) + + if response.status_code == 400: + print("✅ 正确返回400错误(缺少zip_url)") + print(f"错误信息: {response.json()}") + else: + print(f"❌ 预期400错误,实际得到: {response.status_code}") + + except Exception as e: + print(f"❌ 测试失败: {e}") + + +def test_zip_project_feature(): + """测试ZIP项目功能""" + + # API基础URL + base_url = "http://localhost:8000" + + print("测试ZIP项目功能...") + + # 测试数据 - 使用一个示例ZIP文件URL + test_request = { + "messages": [ + { + "role": "user", + "content": "请列出项目目录中的文件" + } + ], + "model": "qwen3-next", + "model_server": "https://openrouter.ai/api/v1", # 示例model_server + "zip_url": "https://example.com/test-project.zip", # 示例URL,需要替换 + "stream": False + } + + try: + print("发送测试请求...") + response = requests.post( + f"{base_url}/chat/completions", + json=test_request, + timeout=30 + ) + + print(f"响应状态码: {response.status_code}") + + if response.status_code == 200: + print("✅ 请求成功") + result = response.json() + print(f"响应内容: {json.dumps(result, indent=2, ensure_ascii=False)}") + else: + print("❌ 请求失败") + print(f"错误信息: {response.text}") + + except requests.exceptions.ConnectionError: + print("❌ 连接失败,请确保API服务正在运行") + except Exception as e: + print(f"❌ 测试失败: {e}") + + +def test_cache_cleanup(): + """测试缓存清理功能""" + + base_url = "http://localhost:8000" + + try: + print("测试缓存清理...") + response = requests.post(f"{base_url}/system/cleanup-cache") + + if response.status_code == 200: + print("✅ 缓存清理成功") + print(f"响应: {response.json()}") + else: + print("❌ 缓存清理失败") + print(f"错误信息: {response.text}") + + except Exception as e: + print(f"❌ 缓存清理测试失败: {e}") + + +def test_system_status(): + """测试系统状态""" + + base_url = "http://localhost:8000" + + try: + print("获取系统状态...") + response = requests.get(f"{base_url}/system/status") + + if response.status_code == 200: + print("✅ 系统状态获取成功") + status = response.json() + print(f"系统状态: {json.dumps(status, indent=2, ensure_ascii=False)}") + else: + print("❌ 系统状态获取失败") + + except Exception as e: + print(f"❌ 系统状态测试失败: {e}") + + +if __name__ == "__main__": + print("=== ZIP项目功能测试 ===") + + # 测试系统状态 + test_system_status() + print() + + # 测试缺少zip_url参数的错误处理 + test_missing_zip_url() + print() + + # 测试ZIP项目功能 + test_zip_project_feature() + print() + + # 测试缓存清理 + test_cache_cleanup() + + print("\n=== 测试完成 ===") + print("\n使用说明:") + print("1. 确保API服务正在运行: python fastapi_app.py") + print("2. 将test_request中的zip_url替换为实际的ZIP文件URL") + print("3. 将model_server替换为实际的模型服务器地址") + print("4. 运行此脚本进行测试") + print("5. 可以通过POST /system/cleanup-cache清理缓存") + print("\n注意:现在model_server和zip_url参数都放在最外层,不再需要extra字段") \ No newline at end of file diff --git a/zip_project_handler.py b/zip_project_handler.py new file mode 100644 index 0000000..215dc3d --- /dev/null +++ b/zip_project_handler.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +ZIP项目处理器 +负责处理从URL下载ZIP文件并解压到项目目录的功能 +""" + +import os +import hashlib +import zipfile +import requests +import tempfile +from typing import Optional +from urllib.parse import urlparse +from pathlib import Path + + +class ZipProjectHandler: + """ZIP项目处理器""" + + def __init__(self, projects_dir: str = "./projects"): + self.projects_dir = Path(projects_dir).resolve() + self.projects_dir.mkdir(exist_ok=True) + self.cache_dir = self.projects_dir / "_cache" + self.cache_dir.mkdir(exist_ok=True) + + def _get_url_hash(self, url: str) -> str: + """获取URL的哈希值用于缓存""" + return hashlib.md5(url.encode('utf-8')).hexdigest()[:16] + + def _is_valid_url(self, url: str) -> bool: + """验证URL是否有效""" + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except Exception: + return False + + def _download_file(self, url: str, local_path: str) -> bool: + """下载文件到本地路径""" + try: + response = requests.get(url, stream=True, timeout=30) + response.raise_for_status() + + with open(local_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + return True + except Exception as e: + print(f"下载文件失败: {e}") + return False + + def _extract_zip(self, zip_path: str, extract_to: str) -> bool: + """解压ZIP文件到指定目录""" + try: + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(extract_to) + return True + except Exception as e: + print(f"解压ZIP文件失败: {e}") + return False + + def get_project_from_zip(self, zip_url: str) -> Optional[str]: + """ + 从ZIP URL获取项目数据 + + Args: + zip_url: ZIP文件的URL + + Returns: + Optional[str]: 成功时返回项目目录路径,失败时返回None + """ + if not self._is_valid_url(zip_url): + print(f"无效的URL: {zip_url}") + return None + + # 检查缓存 + url_hash = self._get_url_hash(zip_url) + cached_project_dir = self.projects_dir / url_hash + + if cached_project_dir.exists(): + print(f"使用缓存的项目目录: {cached_project_dir}") + return str(cached_project_dir) + + # 下载ZIP文件 + zip_filename = f"{url_hash}.zip" + zip_path = self.cache_dir / zip_filename + + if not zip_path.exists(): + print(f"下载ZIP文件: {zip_url}") + if not self._download_file(zip_url, str(zip_path)): + return None + else: + print(f"使用缓存的ZIP文件: {zip_path}") + + # 解压到项目目录 + print(f"解压ZIP文件到: {cached_project_dir}") + if not self._extract_zip(str(zip_path), str(cached_project_dir)): + return None + + print(f"项目准备完成: {cached_project_dir}") + return str(cached_project_dir) + + def cleanup_cache(self): + """清理缓存目录""" + try: + import shutil + if self.cache_dir.exists(): + shutil.rmtree(self.cache_dir) + self.cache_dir.mkdir(exist_ok=True) + print("缓存清理完成") + except Exception as e: + print(f"清理缓存失败: {e}") + + +# 全局ZIP项目处理器实例 +zip_handler = ZipProjectHandler() \ No newline at end of file