The local file-parsing pipeline (upload -> Huey async parse -> generate projects/data/.../document.txt) is no longer needed: RAG retrieval runs against the backend vector store and does not read the local parse output, so removing this has zero impact on existing bot Q&A. - Delete task_queue/ (Huey queue, consumer, tasks, task status store) - Delete parsing utils: dataset_manager, single_file_processor, data_merger, project_manager - Delete db_manager.py (only managed task_status.db) - routes/files.py: keep only POST /api/v1/upload; drop all parse/queue/task endpoints - routes/projects.py: drop /tasks endpoint and task_status import - utils/__init__.py & api_models.py: remove exports/models for deleted modules and queue task models - start_unified.py & start_all_optimized.sh: no longer launch the queue consumer - Drop huey dependency (keep redis) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
156 lines
6.4 KiB
Python
156 lines
6.4 KiB
Python
import os
|
|
import json
|
|
from typing import Optional
|
|
from fastapi import APIRouter, HTTPException
|
|
import logging
|
|
|
|
logger = logging.getLogger('app')
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/api/v1/projects")
|
|
async def list_all_projects():
|
|
"""Get the complete project list."""
|
|
try:
|
|
# Get robot projects (projects/robot)
|
|
robot_dir = "projects/robot"
|
|
robot_projects = []
|
|
|
|
if os.path.exists(robot_dir):
|
|
for item in os.listdir(robot_dir):
|
|
item_path = os.path.join(robot_dir, item)
|
|
if os.path.isdir(item_path):
|
|
try:
|
|
# Read robot config file
|
|
config_path = os.path.join(item_path, "robot_config.json")
|
|
config_data = {}
|
|
if os.path.exists(config_path):
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
config_data = json.load(f)
|
|
|
|
# Count files
|
|
file_count = 0
|
|
if os.path.exists(os.path.join(item_path, "datasets")):
|
|
for root, dirs, files in os.walk(os.path.join(item_path, "datasets")):
|
|
file_count += len(files)
|
|
|
|
robot_projects.append({
|
|
"id": item,
|
|
"name": config_data.get("name", item),
|
|
"type": "robot",
|
|
"status": config_data.get("status", "active"),
|
|
"file_count": file_count,
|
|
"config": config_data,
|
|
"created_at": os.path.getctime(item_path),
|
|
"updated_at": os.path.getmtime(item_path)
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error reading robot project {item}: {str(e)}")
|
|
robot_projects.append({
|
|
"id": item,
|
|
"name": item,
|
|
"type": "robot",
|
|
"status": "unknown",
|
|
"file_count": 0,
|
|
"created_at": os.path.getctime(item_path),
|
|
"updated_at": os.path.getmtime(item_path)
|
|
})
|
|
|
|
# Get datasets (projects/data)
|
|
data_dir = "projects/data"
|
|
datasets = []
|
|
|
|
if os.path.exists(data_dir):
|
|
for item in os.listdir(data_dir):
|
|
item_path = os.path.join(data_dir, item)
|
|
if os.path.isdir(item_path):
|
|
try:
|
|
# Read processing log
|
|
log_path = os.path.join(item_path, "processing_log.json")
|
|
log_data = {}
|
|
if os.path.exists(log_path):
|
|
with open(log_path, 'r', encoding='utf-8') as f:
|
|
log_data = json.load(f)
|
|
|
|
# Count files
|
|
file_count = 0
|
|
for root, dirs, files in os.walk(item_path):
|
|
file_count += len([f for f in files if not f.endswith('.pkl')])
|
|
|
|
# Get status
|
|
status = "active"
|
|
if log_data.get("status"):
|
|
status = log_data["status"]
|
|
elif os.path.exists(os.path.join(item_path, "processed")):
|
|
status = "completed"
|
|
|
|
datasets.append({
|
|
"id": item,
|
|
"name": f"Dataset - {item[:8]}...",
|
|
"type": "dataset",
|
|
"status": status,
|
|
"file_count": file_count,
|
|
"log_data": log_data,
|
|
"created_at": os.path.getctime(item_path),
|
|
"updated_at": os.path.getmtime(item_path)
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error reading dataset {item}: {str(e)}")
|
|
datasets.append({
|
|
"id": item,
|
|
"name": f"Dataset - {item[:8]}...",
|
|
"type": "dataset",
|
|
"status": "unknown",
|
|
"file_count": 0,
|
|
"created_at": os.path.getctime(item_path),
|
|
"updated_at": os.path.getmtime(item_path)
|
|
})
|
|
|
|
all_projects = robot_projects + datasets
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "Project list retrieved successfully",
|
|
"total_projects": len(all_projects),
|
|
"robot_projects": robot_projects,
|
|
"datasets": datasets,
|
|
"projects": all_projects # Keep backward compatibility
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing projects: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Failed to retrieve project list: {str(e)}")
|
|
|
|
|
|
@router.get("/api/v1/projects/robot")
|
|
async def list_robot_projects():
|
|
"""Get the robot project list."""
|
|
try:
|
|
response = await list_all_projects()
|
|
return {
|
|
"success": True,
|
|
"message": "Robot project list retrieved successfully",
|
|
"total_projects": len(response["robot_projects"]),
|
|
"projects": response["robot_projects"]
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error listing robot projects: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Failed to retrieve robot project list: {str(e)}")
|
|
|
|
|
|
@router.get("/api/v1/projects/datasets")
|
|
async def list_datasets():
|
|
"""Get the dataset list."""
|
|
try:
|
|
response = await list_all_projects()
|
|
return {
|
|
"success": True,
|
|
"message": "Dataset list retrieved successfully",
|
|
"total_projects": len(response["datasets"]),
|
|
"projects": response["datasets"]
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error listing datasets: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Failed to retrieve dataset list: {str(e)}")
|