qwen_agent/routes/projects.py
朱潮 77079539c1 refactor: remove file-parsing knowledge-base pipeline and Huey queue
The local file-parsing pipeline (upload -> Huey async parse -> generate
projects/data/.../document.txt) is no longer needed: RAG retrieval runs
against the backend vector store and does not read the local parse output,
so removing this has zero impact on existing bot Q&A.

- Delete task_queue/ (Huey queue, consumer, tasks, task status store)
- Delete parsing utils: dataset_manager, single_file_processor,
  data_merger, project_manager
- Delete db_manager.py (only managed task_status.db)
- routes/files.py: keep only POST /api/v1/upload; drop all
  parse/queue/task endpoints
- routes/projects.py: drop /tasks endpoint and task_status import
- utils/__init__.py & api_models.py: remove exports/models for deleted
  modules and queue task models
- start_unified.py & start_all_optimized.sh: no longer launch the
  queue consumer
- Drop huey dependency (keep redis)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-08 19:40:58 +08:00

156 lines
6.4 KiB
Python

import os
import json
from typing import Optional
from fastapi import APIRouter, HTTPException
import logging
logger = logging.getLogger('app')
router = APIRouter()
@router.get("/api/v1/projects")
async def list_all_projects():
"""Get the complete project list."""
try:
# Get robot projects (projects/robot)
robot_dir = "projects/robot"
robot_projects = []
if os.path.exists(robot_dir):
for item in os.listdir(robot_dir):
item_path = os.path.join(robot_dir, item)
if os.path.isdir(item_path):
try:
# Read robot config file
config_path = os.path.join(item_path, "robot_config.json")
config_data = {}
if os.path.exists(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
config_data = json.load(f)
# Count files
file_count = 0
if os.path.exists(os.path.join(item_path, "datasets")):
for root, dirs, files in os.walk(os.path.join(item_path, "datasets")):
file_count += len(files)
robot_projects.append({
"id": item,
"name": config_data.get("name", item),
"type": "robot",
"status": config_data.get("status", "active"),
"file_count": file_count,
"config": config_data,
"created_at": os.path.getctime(item_path),
"updated_at": os.path.getmtime(item_path)
})
except Exception as e:
logger.error(f"Error reading robot project {item}: {str(e)}")
robot_projects.append({
"id": item,
"name": item,
"type": "robot",
"status": "unknown",
"file_count": 0,
"created_at": os.path.getctime(item_path),
"updated_at": os.path.getmtime(item_path)
})
# Get datasets (projects/data)
data_dir = "projects/data"
datasets = []
if os.path.exists(data_dir):
for item in os.listdir(data_dir):
item_path = os.path.join(data_dir, item)
if os.path.isdir(item_path):
try:
# Read processing log
log_path = os.path.join(item_path, "processing_log.json")
log_data = {}
if os.path.exists(log_path):
with open(log_path, 'r', encoding='utf-8') as f:
log_data = json.load(f)
# Count files
file_count = 0
for root, dirs, files in os.walk(item_path):
file_count += len([f for f in files if not f.endswith('.pkl')])
# Get status
status = "active"
if log_data.get("status"):
status = log_data["status"]
elif os.path.exists(os.path.join(item_path, "processed")):
status = "completed"
datasets.append({
"id": item,
"name": f"Dataset - {item[:8]}...",
"type": "dataset",
"status": status,
"file_count": file_count,
"log_data": log_data,
"created_at": os.path.getctime(item_path),
"updated_at": os.path.getmtime(item_path)
})
except Exception as e:
logger.error(f"Error reading dataset {item}: {str(e)}")
datasets.append({
"id": item,
"name": f"Dataset - {item[:8]}...",
"type": "dataset",
"status": "unknown",
"file_count": 0,
"created_at": os.path.getctime(item_path),
"updated_at": os.path.getmtime(item_path)
})
all_projects = robot_projects + datasets
return {
"success": True,
"message": "Project list retrieved successfully",
"total_projects": len(all_projects),
"robot_projects": robot_projects,
"datasets": datasets,
"projects": all_projects # Keep backward compatibility
}
except Exception as e:
logger.error(f"Error listing projects: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to retrieve project list: {str(e)}")
@router.get("/api/v1/projects/robot")
async def list_robot_projects():
"""Get the robot project list."""
try:
response = await list_all_projects()
return {
"success": True,
"message": "Robot project list retrieved successfully",
"total_projects": len(response["robot_projects"]),
"projects": response["robot_projects"]
}
except Exception as e:
logger.error(f"Error listing robot projects: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to retrieve robot project list: {str(e)}")
@router.get("/api/v1/projects/datasets")
async def list_datasets():
"""Get the dataset list."""
try:
response = await list_all_projects()
return {
"success": True,
"message": "Dataset list retrieved successfully",
"total_projects": len(response["datasets"]),
"projects": response["datasets"]
}
except Exception as e:
logger.error(f"Error listing datasets: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to retrieve dataset list: {str(e)}")