remove process file
This commit is contained in:
parent
1f81bef8c6
commit
213ed20502
@ -278,7 +278,6 @@ curl -X POST "http://localhost:8001/api/v1/tasks/cleanup?older_than_days=7"
|
|||||||
- `POST /api/v1/chat/completions` - OpenAI 兼容的聊天接口
|
- `POST /api/v1/chat/completions` - OpenAI 兼容的聊天接口
|
||||||
|
|
||||||
### 文件处理接口
|
### 文件处理接口
|
||||||
- `POST /api/v1/files/process` - 同步文件处理
|
|
||||||
- `POST /api/v1/files/process/async` - 异步文件处理
|
- `POST /api/v1/files/process/async` - 异步文件处理
|
||||||
- `GET /api/v1/files/{unique_id}/status` - 文件处理状态
|
- `GET /api/v1/files/{unique_id}/status` - 文件处理状态
|
||||||
|
|
||||||
|
|||||||
100
fastapi_app.py
100
fastapi_app.py
@ -16,8 +16,7 @@ from pydantic import BaseModel, Field
|
|||||||
# Import utility modules
|
# Import utility modules
|
||||||
from utils import (
|
from utils import (
|
||||||
# Models
|
# Models
|
||||||
Message, DatasetRequest, ChatRequest, FileProcessRequest,
|
Message, DatasetRequest, ChatRequest, ChatResponse, QueueTaskRequest, QueueTaskResponse,
|
||||||
FileProcessResponse, ChatResponse, QueueTaskRequest, QueueTaskResponse,
|
|
||||||
QueueStatusResponse, TaskStatusResponse,
|
QueueStatusResponse, TaskStatusResponse,
|
||||||
|
|
||||||
# File utilities
|
# File utilities
|
||||||
@ -178,103 +177,6 @@ async def generate_stream_response(agent, messages, request) -> AsyncGenerator[s
|
|||||||
# Models are now imported from utils module
|
# Models are now imported from utils module
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/v1/files/process")
|
|
||||||
async def process_files(request: FileProcessRequest, authorization: Optional[str] = Header(None)):
|
|
||||||
"""
|
|
||||||
Process dataset files for a given unique_id.
|
|
||||||
Files are organized by key groups, and each group is combined into a single document.txt file.
|
|
||||||
Supports zip files which will be extracted and their txt/md contents combined.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: FileProcessRequest containing unique_id, files (key-grouped dict), system_prompt, and mcp_settings
|
|
||||||
authorization: Authorization header containing API key (Bearer <API_KEY>)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
FileProcessResponse: Processing result with file list
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
unique_id = request.unique_id
|
|
||||||
if not unique_id:
|
|
||||||
raise HTTPException(status_code=400, detail="unique_id is required")
|
|
||||||
|
|
||||||
# 处理文件:使用按key分组格式
|
|
||||||
processed_files_by_key = {}
|
|
||||||
if request.files:
|
|
||||||
# 使用请求中的文件(按key分组)
|
|
||||||
processed_files_by_key = await download_dataset_files(unique_id, request.files)
|
|
||||||
total_files = sum(len(files) for files in processed_files_by_key.values())
|
|
||||||
print(f"Processed {total_files} dataset files across {len(processed_files_by_key)} keys for unique_id: {unique_id}")
|
|
||||||
else:
|
|
||||||
print(f"No files provided in request for unique_id: {unique_id}")
|
|
||||||
|
|
||||||
# 使用unique_id获取项目目录
|
|
||||||
project_dir = os.path.join("projects", unique_id)
|
|
||||||
if not os.path.exists(project_dir):
|
|
||||||
raise HTTPException(status_code=400, detail=f"Project directory not found for unique_id: {unique_id}")
|
|
||||||
|
|
||||||
# 收集项目目录下所有的 document.txt 文件
|
|
||||||
document_files = []
|
|
||||||
for root, dirs, files in os.walk(project_dir):
|
|
||||||
for file in files:
|
|
||||||
if file == "document.txt":
|
|
||||||
document_files.append(os.path.join(root, file))
|
|
||||||
|
|
||||||
# 合并所有处理的文件(包含新按key分组的文件)
|
|
||||||
all_files = document_files.copy()
|
|
||||||
for key, files in processed_files_by_key.items():
|
|
||||||
all_files.extend(files)
|
|
||||||
|
|
||||||
if not all_files:
|
|
||||||
print(f"警告: 项目目录 {project_dir} 中未找到任何 document.txt 文件")
|
|
||||||
|
|
||||||
# 保存system_prompt和mcp_settings到项目目录(如果提供)
|
|
||||||
if request.system_prompt:
|
|
||||||
system_prompt_file = os.path.join(project_dir, "system_prompt.md")
|
|
||||||
with open(system_prompt_file, 'w', encoding='utf-8') as f:
|
|
||||||
f.write(request.system_prompt)
|
|
||||||
print(f"Saved system_prompt for unique_id: {unique_id}")
|
|
||||||
|
|
||||||
if request.mcp_settings:
|
|
||||||
mcp_settings_file = os.path.join(project_dir, "mcp_settings.json")
|
|
||||||
with open(mcp_settings_file, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(request.mcp_settings, f, ensure_ascii=False, indent=2)
|
|
||||||
print(f"Saved mcp_settings for unique_id: {unique_id}")
|
|
||||||
|
|
||||||
# 生成项目README.md文件
|
|
||||||
try:
|
|
||||||
save_project_readme(unique_id)
|
|
||||||
print(f"Generated README.md for unique_id: {unique_id}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Failed to generate README.md for unique_id: {unique_id}, error: {str(e)}")
|
|
||||||
# 不影响主要处理流程,继续执行
|
|
||||||
|
|
||||||
# 返回结果包含按key分组的文件信息
|
|
||||||
result_files = []
|
|
||||||
for key in processed_files_by_key.keys():
|
|
||||||
# 添加对应的dataset document.txt路径
|
|
||||||
document_path = os.path.join("projects", unique_id, "dataset", key, "document.txt")
|
|
||||||
if os.path.exists(document_path):
|
|
||||||
result_files.append(document_path)
|
|
||||||
|
|
||||||
# 对于没有在processed_files_by_key中但存在的document.txt文件,也添加到结果中
|
|
||||||
existing_document_paths = set(result_files) # 避免重复
|
|
||||||
for doc_file in document_files:
|
|
||||||
if doc_file not in existing_document_paths:
|
|
||||||
result_files.append(doc_file)
|
|
||||||
|
|
||||||
return FileProcessResponse(
|
|
||||||
success=True,
|
|
||||||
message=f"Successfully processed {len(result_files)} document files across {len(processed_files_by_key)} keys",
|
|
||||||
unique_id=unique_id,
|
|
||||||
processed_files=result_files
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error processing files: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/v1/files/process/async")
|
@app.post("/api/v1/files/process/async")
|
||||||
async def process_files_async_endpoint(request: QueueTaskRequest, authorization: Optional[str] = Header(None)):
|
async def process_files_async_endpoint(request: QueueTaskRequest, authorization: Optional[str] = Header(None)):
|
||||||
|
|||||||
@ -303,7 +303,7 @@ def _process_single_file(
|
|||||||
file_ext = os.path.splitext(original_filename)[1].lower()
|
file_ext = os.path.splitext(original_filename)[1].lower()
|
||||||
|
|
||||||
# 根据文件类型进行不同处理
|
# 根据文件类型进行不同处理
|
||||||
supported_extensions = ['.txt', '.md', '.pdf', '.doc', '.docx', '.zip']
|
supported_extensions = ['.txt', '.md', '.csv', '.xlsx', '.zip']
|
||||||
|
|
||||||
if file_ext not in supported_extensions:
|
if file_ext not in supported_extensions:
|
||||||
return {
|
return {
|
||||||
@ -353,4 +353,4 @@ def daily_cleanup():
|
|||||||
"""每日清理任务"""
|
"""每日清理任务"""
|
||||||
print("执行每日清理任务")
|
print("执行每日清理任务")
|
||||||
# 这里可以添加清理逻辑
|
# 这里可以添加清理逻辑
|
||||||
return {"status": "completed", "message": "每日清理任务完成"}
|
return {"status": "completed", "message": "每日清理任务完成"}
|
||||||
|
|||||||
@ -56,11 +56,9 @@ from .api_models import (
|
|||||||
Message,
|
Message,
|
||||||
DatasetRequest,
|
DatasetRequest,
|
||||||
ChatRequest,
|
ChatRequest,
|
||||||
FileProcessRequest,
|
|
||||||
DatasetResponse,
|
DatasetResponse,
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
ChatResponse,
|
ChatResponse,
|
||||||
FileProcessResponse,
|
|
||||||
ErrorResponse,
|
ErrorResponse,
|
||||||
HealthCheckResponse,
|
HealthCheckResponse,
|
||||||
SystemStatusResponse,
|
SystemStatusResponse,
|
||||||
@ -131,11 +129,9 @@ __all__ = [
|
|||||||
'Message',
|
'Message',
|
||||||
'DatasetRequest',
|
'DatasetRequest',
|
||||||
'ChatRequest',
|
'ChatRequest',
|
||||||
'FileProcessRequest',
|
|
||||||
'DatasetResponse',
|
'DatasetResponse',
|
||||||
'ChatCompletionResponse',
|
'ChatCompletionResponse',
|
||||||
'ChatResponse',
|
'ChatResponse',
|
||||||
'FileProcessResponse',
|
|
||||||
'ErrorResponse',
|
'ErrorResponse',
|
||||||
'HealthCheckResponse',
|
'HealthCheckResponse',
|
||||||
'SystemStatusResponse',
|
'SystemStatusResponse',
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user