qwen_agent/zip_project_handler.py
朱潮 425f3c5bb4 chore: replace Chinese comments and log messages with English
Convert all Chinese comments, docstrings, logger/print output,
HTTPException detail messages, and API response messages to English
across the entire codebase. Functional zh/ja localized strings
(e.g. prompt templates, timezone display names, date formats) are
preserved as-is.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-30 19:45:35 +08:00

195 lines
7.0 KiB
Python

#!/usr/bin/env python3
"""
ZIP project handler.
Responsible for downloading ZIP files from URLs and extracting them into project directories.
"""
import os
import hashlib
import zipfile
import requests
import tempfile
import logging
from typing import List, Optional
from urllib.parse import urlparse
from pathlib import Path
# Configure logging
logger = logging.getLogger('app')
class ZipProjectHandler:
"""ZIP project handler."""
def __init__(self, projects_dir: str = "./projects"):
self.projects_dir = Path(projects_dir).resolve()
self.projects_dir.mkdir(exist_ok=True)
self.cache_dir = self.projects_dir / "_cache"
self.cache_dir.mkdir(exist_ok=True)
def _get_url_hash(self, url: str) -> str:
"""Get the URL hash for caching."""
return hashlib.md5(url.encode('utf-8')).hexdigest()[:16]
def _is_valid_url_or_path(self, path: str) -> bool:
"""Validate whether the URL or local path is valid."""
# First try validating it as a URL
try:
result = urlparse(path)
if all([result.scheme, result.netloc]):
return True
except Exception:
pass
# Then try validating it as a local path
try:
return Path(path).exists()
except Exception:
return False
def _download_file(self, url: str, local_path: str) -> bool:
"""Download a file to a local path."""
try:
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
with open(local_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
logger.error(f"Failed to download file: {e}")
return False
def _copy_local_file(self, local_path: str, target_path: str) -> bool:
"""Copy a local file to the target path."""
try:
import shutil
shutil.copy2(local_path, target_path)
return True
except Exception as e:
logger.error(f"Failed to copy local file: {e}")
return False
def _extract_zip(self, zip_path: str, extract_to: str) -> bool:
"""Extract a ZIP file to the specified directory."""
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
return True
except Exception as e:
logger.error(f"Failed to extract ZIP file: {e}")
return False
def get_project_from_zip(self, zip_url: str, unique_id: Optional[str] = None) -> Optional[str]:
"""
Get project data from a ZIP URL or local path
Args:
zip_url: URL of the ZIP file or local relative path
unique_id: Optional unique identifier used as the folder name
Returns:
Optional[str]: Returns the project directory path on success, or None on failure
"""
if not self._is_valid_url_or_path(zip_url):
logger.error(f"Invalid URL or path: {zip_url}")
return None
# Use unique_id as the directory name, or url_hash if not provided
if unique_id:
project_dir_name = unique_id
# When using unique_id, skip the cache and re-extract directly to ensure the project structure is correct
cached_project_dir = self.projects_dir / project_dir_name
else:
project_dir_name = self._get_url_hash(zip_url)
cached_project_dir = self.projects_dir / project_dir_name
if cached_project_dir.exists() and not unique_id:
logger.info(f"Using cached project directory: {cached_project_dir}")
return str(cached_project_dir)
# Download or copy the ZIP file
url_hash = self._get_url_hash(zip_url)
# When using unique_id, use it as the ZIP filename prefix to avoid conflicts
if unique_id:
zip_filename = f"{unique_id}_{url_hash}.zip"
else:
zip_filename = f"{url_hash}.zip"
zip_path = self.cache_dir / zip_filename
if not zip_path.exists():
# Determine whether it is a URL or a local path
try:
result = urlparse(zip_url)
is_url = all([result.scheme, result.netloc])
except Exception:
is_url = False
if is_url:
logger.info(f"Downloading ZIP file: {zip_url}")
if not self._download_file(zip_url, str(zip_path)):
return None
else:
logger.info(f"Copying local ZIP file: {zip_url}")
# Resolve the relative path
local_path = Path(zip_url).resolve()
if not self._copy_local_file(str(local_path), str(zip_path)):
return None
else:
logger.info(f"Using cached ZIP file: {zip_path}")
# Extract into the project directory
logger.info(f"Extracting ZIP file to: {cached_project_dir}")
if not self._extract_zip(str(zip_path), str(cached_project_dir)):
return None
logger.info(f"Project is ready: {cached_project_dir}")
return str(cached_project_dir)
def collect_document_files(self, project_dir: str) -> List[str]:
"""
Collect all document.txt files under the project directory
Args:
project_dir: Project directory path
Returns:
List[str]: Full path list of all document.txt files
"""
document_files = []
project_path = Path(project_dir)
if not project_path.exists():
logger.error(f"Project directory does not exist: {project_dir}")
return document_files
# Recursively search for all document.txt files
for file_path in project_path.rglob("document.txt"):
if file_path.is_file():
document_files.append(str(file_path))
logger.info(f"Found {project_dir} document.txt files in project directory {len(document_files)}")
for file_path in document_files[:5]: # Only print the first 5 file paths as examples
logger.info(f" - {file_path}")
if len(document_files) > 5:
logger.info(f" ... and {len(document_files) - 5} more files")
return document_files
def cleanup_cache(self):
"""Clean up the cache directory."""
try:
import shutil
if self.cache_dir.exists():
shutil.rmtree(self.cache_dir)
self.cache_dir.mkdir(exist_ok=True)
logger.info("Cache cleanup completed")
except Exception as e:
logger.error(f"Failed to clean up cache: {e}")
# Global ZIP project handler instance
zip_handler = ZipProjectHandler()