229 lines
6.9 KiB
Python
229 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Queue manager for handling file processing queues.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import time
|
|
import logging
|
|
from typing import Dict, List, Optional, Any
|
|
from huey import Huey
|
|
from huey.api import Task
|
|
from datetime import datetime, timedelta
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger('app')
|
|
|
|
from .config import huey
|
|
from .tasks import process_file_async, process_multiple_files_async, process_zip_file_async, cleanup_processed_files
|
|
|
|
|
|
class QueueManager:
|
|
"""Queue manager for file processing tasks."""
|
|
|
|
def __init__(self):
|
|
self.huey = huey
|
|
logger.info("Queue manager initialized with Redis backend")
|
|
|
|
def enqueue_file(
|
|
self,
|
|
project_id: str,
|
|
file_path: str,
|
|
original_filename: str = None,
|
|
delay: int = 0
|
|
) -> str:
|
|
"""
|
|
Add a file to the processing queue.
|
|
|
|
Args:
|
|
project_id: Project ID
|
|
file_path: File path
|
|
original_filename: Original filename
|
|
delay: Delay before execution in seconds
|
|
|
|
Returns:
|
|
Task ID
|
|
"""
|
|
if delay > 0:
|
|
task = process_file_async.schedule(
|
|
args=(project_id, file_path, original_filename),
|
|
delay=timedelta(seconds=delay)
|
|
)
|
|
else:
|
|
task = process_file_async(project_id, file_path, original_filename)
|
|
|
|
logger.info(f"File queued for processing: {file_path}, task ID: {task.id}")
|
|
return task.id
|
|
|
|
def enqueue_multiple_files(
|
|
self,
|
|
project_id: str,
|
|
file_paths: List[str],
|
|
original_filenames: List[str] = None,
|
|
delay: int = 0
|
|
) -> List[str]:
|
|
"""
|
|
Add multiple files to the processing queue.
|
|
|
|
Args:
|
|
project_id: Project ID
|
|
file_paths: List of file paths
|
|
original_filenames: List of original filenames
|
|
delay: Delay before execution in seconds
|
|
|
|
Returns:
|
|
List of task IDs
|
|
"""
|
|
if delay > 0:
|
|
task = process_multiple_files_async.schedule(
|
|
args=(project_id, file_paths, original_filenames),
|
|
delay=timedelta(seconds=delay)
|
|
)
|
|
else:
|
|
task = process_multiple_files_async(project_id, file_paths, original_filenames)
|
|
|
|
logger.info(f"Batch files queued for processing: {len(file_paths)} files, task ID: {task.id}")
|
|
return [task.id]
|
|
|
|
def enqueue_zip_file(
|
|
self,
|
|
project_id: str,
|
|
zip_path: str,
|
|
extract_to: str = None,
|
|
delay: int = 0
|
|
) -> str:
|
|
"""
|
|
Add a zip file to the processing queue.
|
|
|
|
Args:
|
|
project_id: Project ID
|
|
zip_path: Path to the zip file
|
|
extract_to: Extraction target directory
|
|
delay: Delay before execution in seconds
|
|
|
|
Returns:
|
|
Task ID
|
|
"""
|
|
if delay > 0:
|
|
task = process_zip_file_async.schedule(
|
|
args=(project_id, zip_path, extract_to),
|
|
delay=timedelta(seconds=delay)
|
|
)
|
|
else:
|
|
task = process_zip_file_async(project_id, zip_path, extract_to)
|
|
|
|
logger.info(f"Zip file queued for processing: {zip_path}, task ID: {task.id}")
|
|
return task.id
|
|
|
|
def get_task_status(self, task_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Get task status.
|
|
|
|
Args:
|
|
task_id: Task ID
|
|
|
|
Returns:
|
|
Task status information
|
|
"""
|
|
try:
|
|
# Try getting the task result from result storage
|
|
try:
|
|
# Use Huey's built-in result lookup when available
|
|
if hasattr(self.huey, 'result') and self.huey.result:
|
|
result = self.huey.result(task_id)
|
|
if result is not None:
|
|
return {
|
|
"task_id": task_id,
|
|
"status": "complete",
|
|
"result": result
|
|
}
|
|
except Exception:
|
|
pass
|
|
|
|
# Check whether the task is in the pending queue
|
|
try:
|
|
pending_tasks = list(self.huey.pending())
|
|
for task in pending_tasks:
|
|
if hasattr(task, 'id') and task.id == task_id:
|
|
return {
|
|
"task_id": task_id,
|
|
"status": "pending"
|
|
}
|
|
except Exception:
|
|
pass
|
|
|
|
# Check whether the task is in the scheduled queue
|
|
try:
|
|
scheduled_tasks = list(self.huey.scheduled())
|
|
for task in scheduled_tasks:
|
|
if hasattr(task, 'id') and task.id == task_id:
|
|
return {
|
|
"task_id": task_id,
|
|
"status": "scheduled"
|
|
}
|
|
except Exception:
|
|
pass
|
|
|
|
# If not found anywhere, it may not exist or may have completed with cleaned results
|
|
return {
|
|
"task_id": task_id,
|
|
"status": "unknown",
|
|
"message": "Task status is unknown; it may already be complete or may not exist"
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"task_id": task_id,
|
|
"status": "error",
|
|
"message": f"Failed to get task status: {str(e)}"
|
|
}
|
|
|
|
def get_queue_stats(self) -> Dict[str, Any]:
|
|
"""
|
|
Get queue statistics.
|
|
|
|
Returns:
|
|
Queue statistics information
|
|
"""
|
|
try:
|
|
# Use a simplified approach for queue statistics
|
|
stats = {
|
|
"total_tasks": 0,
|
|
"pending_tasks": 0,
|
|
"running_tasks": 0,
|
|
"completed_tasks": 0,
|
|
"error_tasks": 0,
|
|
"scheduled_tasks": 0,
|
|
"recent_tasks": [],
|
|
"queue_backend": "redis"
|
|
}
|
|
|
|
# Try to get the number of pending tasks
|
|
try:
|
|
pending_tasks = list(self.huey.pending())
|
|
stats["pending_tasks"] = len(pending_tasks)
|
|
stats["total_tasks"] += len(pending_tasks)
|
|
except Exception as e:
|
|
logger.error(f"Failed to get pending tasks: {e}")
|
|
|
|
# Try to get the number of scheduled tasks
|
|
try:
|
|
scheduled_tasks = list(self.huey.scheduled())
|
|
stats["scheduled_tasks"] = len(scheduled_tasks)
|
|
stats["total_tasks"] += len(scheduled_tasks)
|
|
except Exception as e:
|
|
logger.error(f"Failed to get scheduled tasks: {e}")
|
|
|
|
return stats
|
|
|
|
except Exception as e:
|
|
return {
|
|
"error": str(e),
|
|
"queue_backend": "redis"
|
|
}
|
|
|
|
|
|
# Global singleton instance
|
|
queue_manager = QueueManager()
|