add zip_url

This commit is contained in:
朱潮 2025-10-07 14:01:27 +08:00
parent 10c2ef0bbc
commit 58079f496a
15 changed files with 466 additions and 181 deletions

157
ZIP_PROJECT_README.md Normal file
View File

@ -0,0 +1,157 @@
# ZIP项目功能说明
## 概述
此功能实现了完全无状态的项目管理用户必须通过在chat接口的extra参数中提供`zip_url`来动态加载项目数据。系统不再支持传统的project_registry.json配置方式。
## 功能特性
- **极简无状态项目加载**: 只需提供ZIP URL系统自动处理所有逻辑
- **自动缓存**: 相同URL的ZIP文件只会下载一次提高性能
- **智能解压**: 自动将ZIP文件解压到项目目录保持原始结构
- **自动项目标识**: 基于URL哈希自动生成唯一项目标识
## API使用方法
### Chat接口
直接在请求体中使用最外层参数:
```json
{
"messages": [
{
"role": "user",
"content": "请分析项目中的数据文件"
}
],
"model": "qwen3-next",
"model_server": "https://openrouter.ai/api/v1",
"zip_url": "https://example.com/my-project.zip",
"stream": false
}
```
### 参数说明
- `model_server`: 模型服务器地址(可选)
- `zip_url`: ZIP文件的下载链接必需
- `extra`: 其他额外参数(可选)
### 系统管理接口
#### 清理缓存
```bash
POST /system/cleanup-cache
```
清理所有下载的ZIP文件缓存。
#### 系统状态
```bash
GET /system/status
```
获取系统状态信息包括agent池状态。
## 工作流程
1. **参数验证**: 检查是否提供了必需的zip_url参数
2. **模型配置**: 如果提供了model_server将其配置到LLM
3. **生成项目标识**: 基于URL哈希自动生成唯一项目标识
4. **下载ZIP**: 系统根据zip_url下载ZIP文件到缓存目录
5. **缓存检查**: 如果URL已被缓存直接使用缓存文件
6. **解压文件**: 将ZIP文件解压到`projects/{url_hash}/`目录,保持原始目录结构
7. **项目访问**: Agent可以直接访问解压后的所有文件和目录
## 缓存机制
- ZIP文件基于URL的MD5哈希值进行缓存
- 缓存位置: `projects/_cache/`
- 项目目录: `projects/{project_id}_{hash}/`
- 相同URL不会重复下载提高性能
## 目录结构
```
projects/
├── _cache/ # ZIP文件缓存
│ ├── abc123.zip # 基于URL哈希的ZIP文件
│ └── def456.zip
├── abc123/ # 解压后的项目目录URL哈希
│ ├── 原始文件和目录结构...
│ └── 保持ZIP中的完整结构
└── def456/
└── 原始文件和目录结构...
```
## 错误处理
- 缺少zip_url: 返回400错误
- 无效URL: 返回400错误
- 下载失败: 返回400错误
- 解压失败: 返回400错误
## 测试
运行测试脚本验证功能:
```bash
python test_zip_feature.py
```
## 注意事项
1. **必需参数**: 所有请求都必须提供zip_url参数
2. **可选参数**: model_server参数可选用于指定自定义模型服务器
3. **URL格式**: zip_url必须是有效的HTTP/HTTPS URL
4. **文件大小**: 建议ZIP文件不超过100MB
5. **安全性**: 确保ZIP文件来源可信
6. **网络**: 需要能够访问zip_url指向的资源
7. **自动标识**: 系统自动基于URL生成项目标识无需手动指定
## 示例使用场景
### 1. 临时项目分析
```python
import requests
response = requests.post("http://localhost:8000/chat/completions", json={
"messages": [{"role": "user", "content": "分析这个数据集"}],
"model_server": "https://openrouter.ai/api/v1",
"zip_url": "https://dataset.example.com/analysis-data.zip"
})
```
### 2. 多项目对比
```python
# 项目1
response1 = requests.post("http://localhost:8000/chat/completions", json={
"messages": [{"role": "user", "content": "总结项目1的特点"}],
"model_server": "https://openrouter.ai/api/v1",
"zip_url": "https://data.example.com/project1.zip"
})
# 项目2
response2 = requests.post("http://localhost:8000/chat/completions", json={
"messages": [{"role": "user", "content": "总结项目2的特点"}],
"model_server": "https://openrouter.ai/api/v1",
"zip_url": "https://data.example.com/project2.zip"
})
```
### 3. 使用默认模型服务器
```python
# 不指定model_server使用默认配置
response = requests.post("http://localhost:8000/chat/completions", json={
"messages": [{"role": "user", "content": "分析项目数据"}],
"zip_url": "https://data.example.com/project.zip"
})
```
## 技术实现
- **下载**: 使用requests库流式下载
- **解压**: 使用zipfile模块
- **缓存**: 基于URL哈希的文件缓存
- **并发安全**: 支持多并发请求处理
这个功能实现了极简的无状态项目管理用户只需在最外层提供model_server和zip_url参数系统会自动处理模型配置、项目标识生成、下载、解压和缓存最大程度简化了项目管理的复杂度。

Binary file not shown.

Binary file not shown.

View File

@ -40,7 +40,7 @@ def get_content_from_messages(messages: List[dict]) -> str:
from agent_pool import (get_agent_from_pool, init_global_agent_pool,
release_agent_to_pool)
from gbase_agent import init_agent_service_universal, update_agent_llm
from project_config import project_manager
from zip_project_handler import zip_handler
app = FastAPI(title="Database Assistant API", version="1.0.0")
@ -57,6 +57,8 @@ class ChatRequest(BaseModel):
messages: List[Message]
model: str = "qwen3-next"
api_key: Optional[str] = None
model_server: Optional[str] = None
zip_url: Optional[str] = None
extra: Optional[Dict] = None
stream: Optional[bool] = False
file_url: Optional[str] = None
@ -154,35 +156,30 @@ async def chat_completions(request: ChatRequest):
"""
agent = None
try:
# 从extra字段中获取project_id
if not request.extra or 'project_id' not in request.extra:
raise HTTPException(status_code=400, detail="project_id is required in extra field")
# 从最外层获取zip_url参数
zip_url = request.zip_url
project_id = request.extra['project_id']
if not zip_url:
raise HTTPException(status_code=400, detail="zip_url is required")
# 验证项目访问权限
if not project_manager.validate_project_access(project_id):
raise HTTPException(status_code=404, detail=f"Project {project_id} not found or inactive")
# 获取项目数据目录
project_dir = project_manager.get_project_dir(project_id)
# 使用ZIP URL获取项目数据
print(f"从ZIP URL加载项目: {zip_url}")
project_dir = zip_handler.get_project_from_zip(zip_url)
if not project_dir:
raise HTTPException(status_code=400, detail=f"Failed to load project from ZIP URL: {zip_url}")
# 从实例池获取助手实例
agent = await get_agent_from_pool(timeout=30.0)
# 准备LLM配置从extra字段中移除project_id
llm_extra = request.extra.copy() if request.extra else {}
llm_extra.pop('project_id', None) # 移除project_id不传递给LLM
# 动态设置请求的模型支持从接口传入api_key和extra参数
update_agent_llm(agent, request.model, request.api_key, llm_extra)
# 动态设置请求的模型支持从接口传入api_key、model_server和extra参数
update_agent_llm(agent, request.model, request.api_key, request.model_server)
# 构建包含项目信息的消息上下文
messages = [
# 项目信息系统消息
{
"role": "user",
"content": f"当前项目ID: {project_id},数据目录: {project_dir}。所有文件路径中的 '[当前数据目录]' 请替换为: {project_dir}"
"content": f"当前项目来自ZIP URL: {zip_url},项目目录: {project_dir}。所有文件路径中的 '[当前数据目录]' 请替换为: {project_dir}"
},
# 用户消息批量转换
*[{"role": msg.role, "content": msg.content} for msg in request.messages]
@ -270,6 +267,16 @@ async def system_status():
}
@app.post("/system/cleanup-cache")
async def cleanup_cache():
"""清理ZIP文件缓存"""
try:
zip_handler.cleanup_cache()
return {"message": "缓存清理成功"}
except Exception as e:
raise HTTPException(status_code=500, detail=f"缓存清理失败: {str(e)}")
@app.on_event("startup")
async def startup_event():
"""应用启动时初始化助手实例池"""

View File

@ -139,22 +139,20 @@ def init_agent_service_universal():
return bot
def update_agent_llm(agent, model_name: str, api_key: str = None, extra: Dict = None):
def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None):
"""动态更新助手实例的LLM支持从接口传入参数"""
# 获取基础配置
llm_config = {
"model": model_name,
"api_key": api_key,
"model_server": model_server
}
# 如果接口传入了extra参数则合并到配置中
if extra is not None:
llm_config.update(extra)
# 创建LLM实例确保不是字典
if "llm_class" in llm_config:
llm_instance = llm_config.get("llm_class", TextChatAtOAI)(llm_config)
else:
#if "llm_class" in llm_config:
# llm_instance = llm_config.get("llm_class", TextChatAtOAI)(llm_config)
#else:
# 使用默认的 TextChatAtOAI 类
llm_instance = TextChatAtOAI(llm_config)

View File

@ -1,154 +0,0 @@
#!/usr/bin/env python3
"""
项目配置管理系统
负责管理项目ID到数据目录的映射以及项目访问权限控制
"""
import json
import os
from typing import Dict, Optional, List
from dataclasses import dataclass, asdict
@dataclass
class ProjectConfig:
"""项目配置数据类"""
project_id: str
data_dir: str
name: str
description: str = ""
allowed_file_types: List[str] = None
max_file_size_mb: int = 100
is_active: bool = True
def __post_init__(self):
if self.allowed_file_types is None:
self.allowed_file_types = [".json", ".txt", ".csv", ".pdf"]
class ProjectManager:
"""项目管理器"""
def __init__(self, config_file: str = "./projects/project_registry.json"):
self.config_file = config_file
self.projects: Dict[str, ProjectConfig] = {}
self._ensure_config_dir()
self._load_projects()
def _ensure_config_dir(self):
"""确保配置目录存在"""
config_dir = os.path.dirname(self.config_file)
if not os.path.exists(config_dir):
os.makedirs(config_dir, exist_ok=True)
def _load_projects(self):
"""从配置文件加载项目"""
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
data = json.load(f)
for project_data in data.get('projects', []):
config = ProjectConfig(**project_data)
self.projects[config.project_id] = config
except Exception as e:
print(f"加载项目配置失败: {e}")
self._create_default_config()
else:
self._create_default_config()
def _create_default_config(self):
"""创建默认配置"""
default_project = ProjectConfig(
project_id="default",
data_dir="./data",
name="默认项目",
description="默认数据项目"
)
self.projects["default"] = default_project
self._save_projects()
def _save_projects(self):
"""保存项目配置到文件"""
data = {
"projects": [asdict(project) for project in self.projects.values()]
}
try:
with open(self.config_file, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
print(f"保存项目配置失败: {e}")
def get_project(self, project_id: str) -> Optional[ProjectConfig]:
"""获取项目配置"""
return self.projects.get(project_id)
def add_project(self, config: ProjectConfig) -> bool:
"""添加项目"""
if config.project_id in self.projects:
return False
# 确保数据目录存在
if not os.path.isabs(config.data_dir):
config.data_dir = os.path.abspath(config.data_dir)
os.makedirs(config.data_dir, exist_ok=True)
self.projects[config.project_id] = config
self._save_projects()
return True
def update_project(self, project_id: str, **kwargs) -> bool:
"""更新项目配置"""
if project_id not in self.projects:
return False
project = self.projects[project_id]
for key, value in kwargs.items():
if hasattr(project, key):
setattr(project, key, value)
self._save_projects()
return True
def delete_project(self, project_id: str) -> bool:
"""删除项目"""
if project_id not in self.projects:
return False
del self.projects[project_id]
self._save_projects()
return True
def list_projects(self) -> List[ProjectConfig]:
"""列出所有项目"""
return list(self.projects.values())
def get_project_dir(self, project_id: str) -> str:
"""获取项目数据目录"""
project = self.get_project(project_id)
if project:
return project.data_dir
# 如果项目不存在,创建默认目录结构
default_dir = f"./projects/{project_id}/data"
os.makedirs(default_dir, exist_ok=True)
# 自动创建新项目配置
new_project = ProjectConfig(
project_id=project_id,
data_dir=default_dir,
name=f"项目 {project_id}",
description=f"自动创建的项目 {project_id}"
)
self.add_project(new_project)
return default_dir
def validate_project_access(self, project_id: str) -> bool:
"""验证项目访问权限"""
project = self.get_project(project_id)
return project and project.is_active
# 全局项目管理器实例
project_manager = ProjectManager()

Binary file not shown.

159
test_zip_feature.py Normal file
View File

@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""
测试ZIP URL功能的脚本
"""
import requests
import json
import os
def test_missing_zip_url():
"""测试缺少zip_url参数的错误处理"""
base_url = "http://localhost:8000"
print("测试缺少zip_url参数...")
# 缺少zip_url的请求
test_request = {
"messages": [
{
"role": "user",
"content": "测试请求"
}
],
"model": "qwen3-next",
# 缺少zip_url参数
"stream": False
}
try:
response = requests.post(
f"{base_url}/chat/completions",
json=test_request,
timeout=10
)
if response.status_code == 400:
print("✅ 正确返回400错误缺少zip_url")
print(f"错误信息: {response.json()}")
else:
print(f"❌ 预期400错误实际得到: {response.status_code}")
except Exception as e:
print(f"❌ 测试失败: {e}")
def test_zip_project_feature():
"""测试ZIP项目功能"""
# API基础URL
base_url = "http://localhost:8000"
print("测试ZIP项目功能...")
# 测试数据 - 使用一个示例ZIP文件URL
test_request = {
"messages": [
{
"role": "user",
"content": "请列出项目目录中的文件"
}
],
"model": "qwen3-next",
"model_server": "https://openrouter.ai/api/v1", # 示例model_server
"zip_url": "https://example.com/test-project.zip", # 示例URL需要替换
"stream": False
}
try:
print("发送测试请求...")
response = requests.post(
f"{base_url}/chat/completions",
json=test_request,
timeout=30
)
print(f"响应状态码: {response.status_code}")
if response.status_code == 200:
print("✅ 请求成功")
result = response.json()
print(f"响应内容: {json.dumps(result, indent=2, ensure_ascii=False)}")
else:
print("❌ 请求失败")
print(f"错误信息: {response.text}")
except requests.exceptions.ConnectionError:
print("❌ 连接失败请确保API服务正在运行")
except Exception as e:
print(f"❌ 测试失败: {e}")
def test_cache_cleanup():
"""测试缓存清理功能"""
base_url = "http://localhost:8000"
try:
print("测试缓存清理...")
response = requests.post(f"{base_url}/system/cleanup-cache")
if response.status_code == 200:
print("✅ 缓存清理成功")
print(f"响应: {response.json()}")
else:
print("❌ 缓存清理失败")
print(f"错误信息: {response.text}")
except Exception as e:
print(f"❌ 缓存清理测试失败: {e}")
def test_system_status():
"""测试系统状态"""
base_url = "http://localhost:8000"
try:
print("获取系统状态...")
response = requests.get(f"{base_url}/system/status")
if response.status_code == 200:
print("✅ 系统状态获取成功")
status = response.json()
print(f"系统状态: {json.dumps(status, indent=2, ensure_ascii=False)}")
else:
print("❌ 系统状态获取失败")
except Exception as e:
print(f"❌ 系统状态测试失败: {e}")
if __name__ == "__main__":
print("=== ZIP项目功能测试 ===")
# 测试系统状态
test_system_status()
print()
# 测试缺少zip_url参数的错误处理
test_missing_zip_url()
print()
# 测试ZIP项目功能
test_zip_project_feature()
print()
# 测试缓存清理
test_cache_cleanup()
print("\n=== 测试完成 ===")
print("\n使用说明:")
print("1. 确保API服务正在运行: python fastapi_app.py")
print("2. 将test_request中的zip_url替换为实际的ZIP文件URL")
print("3. 将model_server替换为实际的模型服务器地址")
print("4. 运行此脚本进行测试")
print("5. 可以通过POST /system/cleanup-cache清理缓存")
print("\n注意现在model_server和zip_url参数都放在最外层不再需要extra字段")

118
zip_project_handler.py Normal file
View File

@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""
ZIP项目处理器
负责处理从URL下载ZIP文件并解压到项目目录的功能
"""
import os
import hashlib
import zipfile
import requests
import tempfile
from typing import Optional
from urllib.parse import urlparse
from pathlib import Path
class ZipProjectHandler:
"""ZIP项目处理器"""
def __init__(self, projects_dir: str = "./projects"):
self.projects_dir = Path(projects_dir).resolve()
self.projects_dir.mkdir(exist_ok=True)
self.cache_dir = self.projects_dir / "_cache"
self.cache_dir.mkdir(exist_ok=True)
def _get_url_hash(self, url: str) -> str:
"""获取URL的哈希值用于缓存"""
return hashlib.md5(url.encode('utf-8')).hexdigest()[:16]
def _is_valid_url(self, url: str) -> bool:
"""验证URL是否有效"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
def _download_file(self, url: str, local_path: str) -> bool:
"""下载文件到本地路径"""
try:
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
with open(local_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
print(f"下载文件失败: {e}")
return False
def _extract_zip(self, zip_path: str, extract_to: str) -> bool:
"""解压ZIP文件到指定目录"""
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
return True
except Exception as e:
print(f"解压ZIP文件失败: {e}")
return False
def get_project_from_zip(self, zip_url: str) -> Optional[str]:
"""
从ZIP URL获取项目数据
Args:
zip_url: ZIP文件的URL
Returns:
Optional[str]: 成功时返回项目目录路径失败时返回None
"""
if not self._is_valid_url(zip_url):
print(f"无效的URL: {zip_url}")
return None
# 检查缓存
url_hash = self._get_url_hash(zip_url)
cached_project_dir = self.projects_dir / url_hash
if cached_project_dir.exists():
print(f"使用缓存的项目目录: {cached_project_dir}")
return str(cached_project_dir)
# 下载ZIP文件
zip_filename = f"{url_hash}.zip"
zip_path = self.cache_dir / zip_filename
if not zip_path.exists():
print(f"下载ZIP文件: {zip_url}")
if not self._download_file(zip_url, str(zip_path)):
return None
else:
print(f"使用缓存的ZIP文件: {zip_path}")
# 解压到项目目录
print(f"解压ZIP文件到: {cached_project_dir}")
if not self._extract_zip(str(zip_path), str(cached_project_dir)):
return None
print(f"项目准备完成: {cached_project_dir}")
return str(cached_project_dir)
def cleanup_cache(self):
"""清理缓存目录"""
try:
import shutil
if self.cache_dir.exists():
shutil.rmtree(self.cache_dir)
self.cache_dir.mkdir(exist_ok=True)
print("缓存清理完成")
except Exception as e:
print(f"清理缓存失败: {e}")
# 全局ZIP项目处理器实例
zip_handler = ZipProjectHandler()