From 213ed20502996ce312951d7c48b81c39521b5348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Sun, 19 Oct 2025 22:12:27 +0800 Subject: [PATCH] remove process file --- README.md | 1 - fastapi_app.py | 100 +------------------------------------------- task_queue/tasks.py | 4 +- utils/__init__.py | 4 -- 4 files changed, 3 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index 29e5a56..3f4d654 100644 --- a/README.md +++ b/README.md @@ -278,7 +278,6 @@ curl -X POST "http://localhost:8001/api/v1/tasks/cleanup?older_than_days=7" - `POST /api/v1/chat/completions` - OpenAI 兼容的聊天接口 ### 文件处理接口 -- `POST /api/v1/files/process` - 同步文件处理 - `POST /api/v1/files/process/async` - 异步文件处理 - `GET /api/v1/files/{unique_id}/status` - 文件处理状态 diff --git a/fastapi_app.py b/fastapi_app.py index f8fe08d..d7f3f79 100644 --- a/fastapi_app.py +++ b/fastapi_app.py @@ -16,8 +16,7 @@ from pydantic import BaseModel, Field # Import utility modules from utils import ( # Models - Message, DatasetRequest, ChatRequest, FileProcessRequest, - FileProcessResponse, ChatResponse, QueueTaskRequest, QueueTaskResponse, + Message, DatasetRequest, ChatRequest, ChatResponse, QueueTaskRequest, QueueTaskResponse, QueueStatusResponse, TaskStatusResponse, # File utilities @@ -178,103 +177,6 @@ async def generate_stream_response(agent, messages, request) -> AsyncGenerator[s # Models are now imported from utils module -@app.post("/api/v1/files/process") -async def process_files(request: FileProcessRequest, authorization: Optional[str] = Header(None)): - """ - Process dataset files for a given unique_id. - Files are organized by key groups, and each group is combined into a single document.txt file. - Supports zip files which will be extracted and their txt/md contents combined. - - Args: - request: FileProcessRequest containing unique_id, files (key-grouped dict), system_prompt, and mcp_settings - authorization: Authorization header containing API key (Bearer ) - - Returns: - FileProcessResponse: Processing result with file list - """ - try: - unique_id = request.unique_id - if not unique_id: - raise HTTPException(status_code=400, detail="unique_id is required") - - # 处理文件:使用按key分组格式 - processed_files_by_key = {} - if request.files: - # 使用请求中的文件(按key分组) - processed_files_by_key = await download_dataset_files(unique_id, request.files) - total_files = sum(len(files) for files in processed_files_by_key.values()) - print(f"Processed {total_files} dataset files across {len(processed_files_by_key)} keys for unique_id: {unique_id}") - else: - print(f"No files provided in request for unique_id: {unique_id}") - - # 使用unique_id获取项目目录 - project_dir = os.path.join("projects", unique_id) - if not os.path.exists(project_dir): - raise HTTPException(status_code=400, detail=f"Project directory not found for unique_id: {unique_id}") - - # 收集项目目录下所有的 document.txt 文件 - document_files = [] - for root, dirs, files in os.walk(project_dir): - for file in files: - if file == "document.txt": - document_files.append(os.path.join(root, file)) - - # 合并所有处理的文件(包含新按key分组的文件) - all_files = document_files.copy() - for key, files in processed_files_by_key.items(): - all_files.extend(files) - - if not all_files: - print(f"警告: 项目目录 {project_dir} 中未找到任何 document.txt 文件") - - # 保存system_prompt和mcp_settings到项目目录(如果提供) - if request.system_prompt: - system_prompt_file = os.path.join(project_dir, "system_prompt.md") - with open(system_prompt_file, 'w', encoding='utf-8') as f: - f.write(request.system_prompt) - print(f"Saved system_prompt for unique_id: {unique_id}") - - if request.mcp_settings: - mcp_settings_file = os.path.join(project_dir, "mcp_settings.json") - with open(mcp_settings_file, 'w', encoding='utf-8') as f: - json.dump(request.mcp_settings, f, ensure_ascii=False, indent=2) - print(f"Saved mcp_settings for unique_id: {unique_id}") - - # 生成项目README.md文件 - try: - save_project_readme(unique_id) - print(f"Generated README.md for unique_id: {unique_id}") - except Exception as e: - print(f"Failed to generate README.md for unique_id: {unique_id}, error: {str(e)}") - # 不影响主要处理流程,继续执行 - - # 返回结果包含按key分组的文件信息 - result_files = [] - for key in processed_files_by_key.keys(): - # 添加对应的dataset document.txt路径 - document_path = os.path.join("projects", unique_id, "dataset", key, "document.txt") - if os.path.exists(document_path): - result_files.append(document_path) - - # 对于没有在processed_files_by_key中但存在的document.txt文件,也添加到结果中 - existing_document_paths = set(result_files) # 避免重复 - for doc_file in document_files: - if doc_file not in existing_document_paths: - result_files.append(doc_file) - - return FileProcessResponse( - success=True, - message=f"Successfully processed {len(result_files)} document files across {len(processed_files_by_key)} keys", - unique_id=unique_id, - processed_files=result_files - ) - - except HTTPException: - raise - except Exception as e: - print(f"Error processing files: {str(e)}") - raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") - @app.post("/api/v1/files/process/async") async def process_files_async_endpoint(request: QueueTaskRequest, authorization: Optional[str] = Header(None)): diff --git a/task_queue/tasks.py b/task_queue/tasks.py index 8c35ef9..10a2afd 100644 --- a/task_queue/tasks.py +++ b/task_queue/tasks.py @@ -303,7 +303,7 @@ def _process_single_file( file_ext = os.path.splitext(original_filename)[1].lower() # 根据文件类型进行不同处理 - supported_extensions = ['.txt', '.md', '.pdf', '.doc', '.docx', '.zip'] + supported_extensions = ['.txt', '.md', '.csv', '.xlsx', '.zip'] if file_ext not in supported_extensions: return { @@ -353,4 +353,4 @@ def daily_cleanup(): """每日清理任务""" print("执行每日清理任务") # 这里可以添加清理逻辑 - return {"status": "completed", "message": "每日清理任务完成"} \ No newline at end of file + return {"status": "completed", "message": "每日清理任务完成"} diff --git a/utils/__init__.py b/utils/__init__.py index e2e0c18..6bb64f0 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -56,11 +56,9 @@ from .api_models import ( Message, DatasetRequest, ChatRequest, - FileProcessRequest, DatasetResponse, ChatCompletionResponse, ChatResponse, - FileProcessResponse, ErrorResponse, HealthCheckResponse, SystemStatusResponse, @@ -131,11 +129,9 @@ __all__ = [ 'Message', 'DatasetRequest', 'ChatRequest', - 'FileProcessRequest', 'DatasetResponse', 'ChatCompletionResponse', 'ChatResponse', - 'FileProcessResponse', 'ErrorResponse', 'HealthCheckResponse', 'SystemStatusResponse',