The local file-parsing pipeline (upload -> Huey async parse -> generate projects/data/.../document.txt) is no longer needed: RAG retrieval runs against the backend vector store and does not read the local parse output, so removing this has zero impact on existing bot Q&A. - Delete task_queue/ (Huey queue, consumer, tasks, task status store) - Delete parsing utils: dataset_manager, single_file_processor, data_merger, project_manager - Delete db_manager.py (only managed task_status.db) - routes/files.py: keep only POST /api/v1/upload; drop all parse/queue/task endpoints - routes/projects.py: drop /tasks endpoint and task_status import - utils/__init__.py & api_models.py: remove exports/models for deleted modules and queue task models - start_unified.py & start_all_optimized.sh: no longer launch the queue consumer - Drop huey dependency (keep redis) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
299 lines
9.2 KiB
Python
299 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
API data models and response schemas.
|
|
"""
|
|
|
|
from typing import Dict, List, Optional, Any, AsyncGenerator
|
|
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
|
|
|
class Message(BaseModel):
|
|
role: str
|
|
content: str
|
|
|
|
|
|
class DatasetRequest(BaseModel):
|
|
system_prompt: Optional[str] = None
|
|
mcp_settings: Optional[List[Dict]] = None
|
|
files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)")
|
|
unique_id: Optional[str] = None
|
|
|
|
@field_validator('files', mode='before')
|
|
@classmethod
|
|
def validate_files(cls, v):
|
|
"""Validate dict format with key-grouped files"""
|
|
if v is None:
|
|
return None
|
|
if isinstance(v, dict):
|
|
# Validate dict format
|
|
for key, value in v.items():
|
|
if not isinstance(key, str):
|
|
raise ValueError(f"Key in files dict must be string, got {type(key)}")
|
|
if not isinstance(value, list):
|
|
raise ValueError(f"Value in files dict must be list, got {type(value)} for key '{key}'")
|
|
for item in value:
|
|
if not isinstance(item, str):
|
|
raise ValueError(f"File paths must be strings, got {type(item)} in key '{key}'")
|
|
return v
|
|
else:
|
|
raise ValueError(f"Files must be a dict with key groups, got {type(v)}")
|
|
|
|
|
|
class ChatRequest(BaseModel):
|
|
messages: List[Message]
|
|
model: str = "qwen3-next"
|
|
model_server: str = ""
|
|
dataset_ids: Optional[List[str]] = None
|
|
bot_id: str
|
|
stream: Optional[bool] = False
|
|
language: Optional[str] = "zh"
|
|
tool_response: Optional[bool] = True
|
|
system_prompt: Optional[str] = ""
|
|
mcp_settings: Optional[List[Dict]] = None
|
|
user_identifier: Optional[str] = ""
|
|
session_id: Optional[str] = None
|
|
enable_thinking: Optional[bool] = False
|
|
skills: Optional[List[str]] = None
|
|
enable_memory: Optional[bool] = False
|
|
enable_self_knowledge: Optional[bool] = False
|
|
shell_env: Optional[Dict[str, str]] = None
|
|
|
|
model_config = ConfigDict(extra='allow')
|
|
|
|
|
|
class ChatRequestV2(BaseModel):
|
|
messages: List[Message]
|
|
stream: Optional[bool] = False
|
|
tool_response: Optional[bool] = True
|
|
bot_id: str
|
|
language: Optional[str] = "zh"
|
|
user_identifier: Optional[str] = ""
|
|
session_id: Optional[str] = None
|
|
|
|
model_config = ConfigDict(extra='allow')
|
|
|
|
|
|
class FileProcessRequest(BaseModel):
|
|
unique_id: str
|
|
files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)")
|
|
system_prompt: Optional[str] = None
|
|
mcp_settings: Optional[List[Dict]] = None
|
|
|
|
model_config = ConfigDict(extra='allow')
|
|
|
|
@field_validator('files', mode='before')
|
|
@classmethod
|
|
def validate_files(cls, v):
|
|
"""Validate dict format with key-grouped files"""
|
|
if v is None:
|
|
return None
|
|
if isinstance(v, dict):
|
|
# Validate dict format
|
|
for key, value in v.items():
|
|
if not isinstance(key, str):
|
|
raise ValueError(f"Key in files dict must be string, got {type(key)}")
|
|
if not isinstance(value, list):
|
|
raise ValueError(f"Value in files dict must be list, got {type(value)} for key '{key}'")
|
|
for item in value:
|
|
if not isinstance(item, str):
|
|
raise ValueError(f"File paths must be strings, got {type(item)} in key '{key}'")
|
|
return v
|
|
else:
|
|
raise ValueError(f"Files must be a dict with key groups, got {type(v)}")
|
|
|
|
|
|
class DatasetResponse(BaseModel):
|
|
success: bool
|
|
message: str
|
|
unique_id: Optional[str] = None
|
|
dataset_structure: Optional[str] = None
|
|
|
|
|
|
class ChatCompletionResponse(BaseModel):
|
|
id: str
|
|
object: str = "chat.completion"
|
|
created: int
|
|
model: str
|
|
choices: List[Dict[str, Any]]
|
|
usage: Optional[Dict[str, int]] = None
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
choices: List[Dict]
|
|
usage: Optional[Dict] = None
|
|
|
|
|
|
class FileProcessResponse(BaseModel):
|
|
success: bool
|
|
message: str
|
|
unique_id: str
|
|
processed_files: List[str]
|
|
|
|
|
|
class ErrorResponse(BaseModel):
|
|
error: Dict[str, Any]
|
|
|
|
@classmethod
|
|
def create(cls, message: str, error_type: str = "invalid_request_error", code: Optional[str] = None):
|
|
error_data = {
|
|
"message": message,
|
|
"type": error_type
|
|
}
|
|
if code:
|
|
error_data["code"] = code
|
|
return cls(error=error_data)
|
|
|
|
|
|
class HealthCheckResponse(BaseModel):
|
|
status: str = "healthy"
|
|
timestamp: str
|
|
version: str = "1.0.0"
|
|
|
|
|
|
class SystemStatusResponse(BaseModel):
|
|
status: str
|
|
projects_count: int
|
|
total_projects: List[str]
|
|
active_projects: List[str]
|
|
system_info: Dict[str, Any]
|
|
|
|
|
|
class CacheStatusResponse(BaseModel):
|
|
cached_projects: List[str]
|
|
cache_info: Dict[str, Any]
|
|
|
|
|
|
class ProjectStatusResponse(BaseModel):
|
|
unique_id: str
|
|
project_exists: bool
|
|
project_path: Optional[str] = None
|
|
processed_files_count: int
|
|
processed_files: Dict[str, Dict]
|
|
document_files_count: int
|
|
document_files: List[str]
|
|
has_system_prompt: bool
|
|
has_mcp_settings: bool
|
|
readme_exists: bool
|
|
log_file_exists: bool
|
|
dataset_structure: Optional[str] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
class ProjectListResponse(BaseModel):
|
|
projects: List[str]
|
|
count: int
|
|
|
|
|
|
class ProjectStatsResponse(BaseModel):
|
|
unique_id: str
|
|
total_processed_files: int
|
|
total_document_files: int
|
|
total_document_size: int
|
|
total_document_size_mb: float
|
|
has_system_prompt: bool
|
|
has_mcp_settings: bool
|
|
has_readme: bool
|
|
document_files_detail: List[Dict[str, Any]]
|
|
embedding_files_count: int
|
|
embedding_files_detail: List[Dict[str, Any]]
|
|
|
|
|
|
class ProjectActionResponse(BaseModel):
|
|
success: bool
|
|
message: str
|
|
unique_id: str
|
|
action: str
|
|
|
|
|
|
# Utility functions for creating responses
|
|
def create_success_response(message: str, **kwargs) -> Dict[str, Any]:
|
|
"""Create a standardized success response"""
|
|
return {
|
|
"success": True,
|
|
"message": message,
|
|
**kwargs
|
|
}
|
|
|
|
|
|
def create_error_response(message: str, error_type: str = "error", **kwargs) -> Dict[str, Any]:
|
|
"""Create a standardized error response"""
|
|
return {
|
|
"success": False,
|
|
"error": error_type,
|
|
"message": message,
|
|
**kwargs
|
|
}
|
|
|
|
|
|
def create_chat_response(
|
|
messages: List[Message],
|
|
model: str,
|
|
content: str,
|
|
usage: Optional[Dict[str, int]] = None
|
|
) -> Dict[str, Any]:
|
|
"""Create a chat completion response"""
|
|
import time
|
|
import uuid
|
|
|
|
return {
|
|
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
"object": "chat.completion",
|
|
"created": int(time.time()),
|
|
"model": model,
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"message": {
|
|
"role": "assistant",
|
|
"content": content
|
|
},
|
|
"finish_reason": "stop"
|
|
}
|
|
],
|
|
"usage": usage or {
|
|
"prompt_tokens": 0,
|
|
"completion_tokens": 0,
|
|
"total_tokens": 0
|
|
}
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Models related to chat history queries
|
|
# ============================================================================
|
|
|
|
class ChatHistoryRequest(BaseModel):
|
|
"""Chat history query request"""
|
|
session_id: str = Field(..., description="Session ID (thread_id)")
|
|
last_message_id: Optional[str] = Field(None, description="ID of the previous message, used to paginate and fetch earlier messages")
|
|
limit: int = Field(20, ge=1, le=100, description="Maximum number of messages to return per query")
|
|
|
|
|
|
class ChatHistoryMessage(BaseModel):
|
|
"""Chat history message"""
|
|
id: str = Field(..., description="Unique message ID")
|
|
role: str = Field(..., description="Message role: user or assistant")
|
|
content: str = Field(..., description="Message content")
|
|
timestamp: Optional[str] = Field(None, description="Message timestamp (ISO 8601)")
|
|
|
|
|
|
class ChatHistoryResponse(BaseModel):
|
|
"""Chat history query response"""
|
|
messages: List[ChatHistoryMessage] = Field(..., description="List of messages in reverse chronological order")
|
|
has_more: bool = Field(..., description="Whether more history messages are available")
|
|
|
|
|
|
class BatchSaveChatRequest(BaseModel):
|
|
"""Batch save chat records request"""
|
|
session_id: str = Field(..., description="Session ID (thread_id)")
|
|
messages: List[Message] = Field(..., description="List of messages to save, supporting user and assistant roles")
|
|
bot_id: Optional[str] = Field(None, description="Bot ID")
|
|
|
|
|
|
class BatchSaveChatResponse(BaseModel):
|
|
"""Batch save chat records response"""
|
|
success: bool = Field(..., description="Whether the operation succeeded")
|
|
message: str = Field(..., description="Response message")
|
|
session_id: str = Field(..., description="Session ID")
|
|
saved_count: int = Field(..., description="Number of messages saved successfully")
|
|
message_ids: List[str] = Field(..., description="List of saved message IDs")
|