remove process file
This commit is contained in:
parent
1f81bef8c6
commit
213ed20502
@ -278,7 +278,6 @@ curl -X POST "http://localhost:8001/api/v1/tasks/cleanup?older_than_days=7"
|
||||
- `POST /api/v1/chat/completions` - OpenAI 兼容的聊天接口
|
||||
|
||||
### 文件处理接口
|
||||
- `POST /api/v1/files/process` - 同步文件处理
|
||||
- `POST /api/v1/files/process/async` - 异步文件处理
|
||||
- `GET /api/v1/files/{unique_id}/status` - 文件处理状态
|
||||
|
||||
|
||||
100
fastapi_app.py
100
fastapi_app.py
@ -16,8 +16,7 @@ from pydantic import BaseModel, Field
|
||||
# Import utility modules
|
||||
from utils import (
|
||||
# Models
|
||||
Message, DatasetRequest, ChatRequest, FileProcessRequest,
|
||||
FileProcessResponse, ChatResponse, QueueTaskRequest, QueueTaskResponse,
|
||||
Message, DatasetRequest, ChatRequest, ChatResponse, QueueTaskRequest, QueueTaskResponse,
|
||||
QueueStatusResponse, TaskStatusResponse,
|
||||
|
||||
# File utilities
|
||||
@ -178,103 +177,6 @@ async def generate_stream_response(agent, messages, request) -> AsyncGenerator[s
|
||||
# Models are now imported from utils module
|
||||
|
||||
|
||||
@app.post("/api/v1/files/process")
|
||||
async def process_files(request: FileProcessRequest, authorization: Optional[str] = Header(None)):
|
||||
"""
|
||||
Process dataset files for a given unique_id.
|
||||
Files are organized by key groups, and each group is combined into a single document.txt file.
|
||||
Supports zip files which will be extracted and their txt/md contents combined.
|
||||
|
||||
Args:
|
||||
request: FileProcessRequest containing unique_id, files (key-grouped dict), system_prompt, and mcp_settings
|
||||
authorization: Authorization header containing API key (Bearer <API_KEY>)
|
||||
|
||||
Returns:
|
||||
FileProcessResponse: Processing result with file list
|
||||
"""
|
||||
try:
|
||||
unique_id = request.unique_id
|
||||
if not unique_id:
|
||||
raise HTTPException(status_code=400, detail="unique_id is required")
|
||||
|
||||
# 处理文件:使用按key分组格式
|
||||
processed_files_by_key = {}
|
||||
if request.files:
|
||||
# 使用请求中的文件(按key分组)
|
||||
processed_files_by_key = await download_dataset_files(unique_id, request.files)
|
||||
total_files = sum(len(files) for files in processed_files_by_key.values())
|
||||
print(f"Processed {total_files} dataset files across {len(processed_files_by_key)} keys for unique_id: {unique_id}")
|
||||
else:
|
||||
print(f"No files provided in request for unique_id: {unique_id}")
|
||||
|
||||
# 使用unique_id获取项目目录
|
||||
project_dir = os.path.join("projects", unique_id)
|
||||
if not os.path.exists(project_dir):
|
||||
raise HTTPException(status_code=400, detail=f"Project directory not found for unique_id: {unique_id}")
|
||||
|
||||
# 收集项目目录下所有的 document.txt 文件
|
||||
document_files = []
|
||||
for root, dirs, files in os.walk(project_dir):
|
||||
for file in files:
|
||||
if file == "document.txt":
|
||||
document_files.append(os.path.join(root, file))
|
||||
|
||||
# 合并所有处理的文件(包含新按key分组的文件)
|
||||
all_files = document_files.copy()
|
||||
for key, files in processed_files_by_key.items():
|
||||
all_files.extend(files)
|
||||
|
||||
if not all_files:
|
||||
print(f"警告: 项目目录 {project_dir} 中未找到任何 document.txt 文件")
|
||||
|
||||
# 保存system_prompt和mcp_settings到项目目录(如果提供)
|
||||
if request.system_prompt:
|
||||
system_prompt_file = os.path.join(project_dir, "system_prompt.md")
|
||||
with open(system_prompt_file, 'w', encoding='utf-8') as f:
|
||||
f.write(request.system_prompt)
|
||||
print(f"Saved system_prompt for unique_id: {unique_id}")
|
||||
|
||||
if request.mcp_settings:
|
||||
mcp_settings_file = os.path.join(project_dir, "mcp_settings.json")
|
||||
with open(mcp_settings_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(request.mcp_settings, f, ensure_ascii=False, indent=2)
|
||||
print(f"Saved mcp_settings for unique_id: {unique_id}")
|
||||
|
||||
# 生成项目README.md文件
|
||||
try:
|
||||
save_project_readme(unique_id)
|
||||
print(f"Generated README.md for unique_id: {unique_id}")
|
||||
except Exception as e:
|
||||
print(f"Failed to generate README.md for unique_id: {unique_id}, error: {str(e)}")
|
||||
# 不影响主要处理流程,继续执行
|
||||
|
||||
# 返回结果包含按key分组的文件信息
|
||||
result_files = []
|
||||
for key in processed_files_by_key.keys():
|
||||
# 添加对应的dataset document.txt路径
|
||||
document_path = os.path.join("projects", unique_id, "dataset", key, "document.txt")
|
||||
if os.path.exists(document_path):
|
||||
result_files.append(document_path)
|
||||
|
||||
# 对于没有在processed_files_by_key中但存在的document.txt文件,也添加到结果中
|
||||
existing_document_paths = set(result_files) # 避免重复
|
||||
for doc_file in document_files:
|
||||
if doc_file not in existing_document_paths:
|
||||
result_files.append(doc_file)
|
||||
|
||||
return FileProcessResponse(
|
||||
success=True,
|
||||
message=f"Successfully processed {len(result_files)} document files across {len(processed_files_by_key)} keys",
|
||||
unique_id=unique_id,
|
||||
processed_files=result_files
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error processing files: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
|
||||
@app.post("/api/v1/files/process/async")
|
||||
async def process_files_async_endpoint(request: QueueTaskRequest, authorization: Optional[str] = Header(None)):
|
||||
|
||||
@ -303,7 +303,7 @@ def _process_single_file(
|
||||
file_ext = os.path.splitext(original_filename)[1].lower()
|
||||
|
||||
# 根据文件类型进行不同处理
|
||||
supported_extensions = ['.txt', '.md', '.pdf', '.doc', '.docx', '.zip']
|
||||
supported_extensions = ['.txt', '.md', '.csv', '.xlsx', '.zip']
|
||||
|
||||
if file_ext not in supported_extensions:
|
||||
return {
|
||||
@ -353,4 +353,4 @@ def daily_cleanup():
|
||||
"""每日清理任务"""
|
||||
print("执行每日清理任务")
|
||||
# 这里可以添加清理逻辑
|
||||
return {"status": "completed", "message": "每日清理任务完成"}
|
||||
return {"status": "completed", "message": "每日清理任务完成"}
|
||||
|
||||
@ -56,11 +56,9 @@ from .api_models import (
|
||||
Message,
|
||||
DatasetRequest,
|
||||
ChatRequest,
|
||||
FileProcessRequest,
|
||||
DatasetResponse,
|
||||
ChatCompletionResponse,
|
||||
ChatResponse,
|
||||
FileProcessResponse,
|
||||
ErrorResponse,
|
||||
HealthCheckResponse,
|
||||
SystemStatusResponse,
|
||||
@ -131,11 +129,9 @@ __all__ = [
|
||||
'Message',
|
||||
'DatasetRequest',
|
||||
'ChatRequest',
|
||||
'FileProcessRequest',
|
||||
'DatasetResponse',
|
||||
'ChatCompletionResponse',
|
||||
'ChatResponse',
|
||||
'FileProcessResponse',
|
||||
'ErrorResponse',
|
||||
'HealthCheckResponse',
|
||||
'SystemStatusResponse',
|
||||
|
||||
Loading…
Reference in New Issue
Block a user