Convert all Chinese comments, docstrings, logger/print output, HTTPException detail messages, and API response messages to English across the entire codebase. Functional zh/ja localized strings (e.g. prompt templates, timezone display names, date formats) are preserved as-is. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
297 lines
9.8 KiB
Python
297 lines
9.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Async file operations - provides efficient async file read/write functionality.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import asyncio
|
|
import aiofiles
|
|
import aiofiles.os
|
|
import logging
|
|
from typing import Dict, List, Optional, Any
|
|
from pathlib import Path
|
|
import weakref
|
|
import threading
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
# Configure logger
|
|
logger = logging.getLogger('app')
|
|
|
|
|
|
class AsyncFileCache:
|
|
"""Async file cache manager."""
|
|
|
|
def __init__(self, cache_size: int = 1000, ttl: int = 300):
|
|
"""
|
|
Initialize the file cache.
|
|
|
|
Args:
|
|
cache_size: Maximum number of cached files
|
|
ttl: Cache TTL (seconds)
|
|
"""
|
|
self.cache_size = cache_size
|
|
self.ttl = ttl
|
|
self._cache = {} # {file_path: (content, timestamp)}
|
|
self._lock = asyncio.Lock()
|
|
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="async_file_io")
|
|
|
|
async def read_file(self, file_path: str, encoding: str = 'utf-8') -> str:
|
|
"""Async read file content with caching."""
|
|
abs_path = os.path.abspath(file_path)
|
|
|
|
async with self._lock:
|
|
# Check cache
|
|
if abs_path in self._cache:
|
|
content, timestamp = self._cache[abs_path]
|
|
if time.time() - timestamp < self.ttl:
|
|
return content
|
|
|
|
# Use thread pool for async file read
|
|
loop = asyncio.get_running_loop()
|
|
try:
|
|
# Check if file exists
|
|
exists = await loop.run_in_executor(
|
|
self._executor, os.path.exists, abs_path
|
|
)
|
|
if not exists:
|
|
return ""
|
|
|
|
# Read file content
|
|
content = await loop.run_in_executor(
|
|
self._executor, self._read_text_file, abs_path, encoding
|
|
)
|
|
|
|
# Update cache (LRU strategy)
|
|
if len(self._cache) >= self.cache_size:
|
|
# Remove the oldest cache entry
|
|
oldest_key = min(self._cache.keys(),
|
|
key=lambda k: self._cache[k][1])
|
|
del self._cache[oldest_key]
|
|
|
|
self._cache[abs_path] = (content, time.time())
|
|
return content
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error reading file {abs_path}: {e}")
|
|
return ""
|
|
|
|
def _read_text_file(self, file_path: str, encoding: str) -> str:
|
|
"""Read a text file synchronously in the thread pool."""
|
|
try:
|
|
with open(file_path, 'r', encoding=encoding) as f:
|
|
return f.read()
|
|
except Exception:
|
|
return ""
|
|
|
|
async def read_json(self, file_path: str) -> Dict[str, Any]:
|
|
"""Async read a JSON file."""
|
|
content = await self.read_file(file_path)
|
|
if not content.strip():
|
|
return {}
|
|
|
|
try:
|
|
return json.loads(content)
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Error parsing JSON from {file_path}: {e}")
|
|
return {}
|
|
|
|
async def write_file(self, file_path: str, content: str, encoding: str = 'utf-8'):
|
|
"""Async write file content."""
|
|
abs_path = os.path.abspath(file_path)
|
|
|
|
# Ensure directory exists
|
|
dir_path = os.path.dirname(abs_path)
|
|
if dir_path:
|
|
await aiofiles.os.makedirs(dir_path, exist_ok=True)
|
|
|
|
# Use aiofiles for async write
|
|
async with aiofiles.open(file_path, 'w', encoding=encoding) as f:
|
|
await f.write(content)
|
|
|
|
# Update cache
|
|
async with self._lock:
|
|
self._cache[abs_path] = (content, time.time())
|
|
|
|
async def write_json(self, file_path: str, data: Dict[str, Any], indent: int = 2):
|
|
"""Async write a JSON file."""
|
|
content = json.dumps(data, ensure_ascii=False, indent=indent)
|
|
await self.write_file(file_path, content)
|
|
|
|
async def exists(self, file_path: str) -> bool:
|
|
"""Async check if a file exists."""
|
|
loop = asyncio.get_running_loop()
|
|
return await loop.run_in_executor(
|
|
self._executor, os.path.exists, file_path
|
|
)
|
|
|
|
async def getmtime(self, file_path: str) -> float:
|
|
"""Async get file modification time."""
|
|
loop = asyncio.get_running_loop()
|
|
try:
|
|
return await loop.run_in_executor(
|
|
self._executor, os.path.getmtime, file_path
|
|
)
|
|
except OSError:
|
|
return 0.0
|
|
|
|
def invalidate_cache(self, file_path: Optional[str] = None):
|
|
"""Invalidate cache entries."""
|
|
if file_path:
|
|
abs_path = os.path.abspath(file_path)
|
|
asyncio.create_task(self._invalidate_single(abs_path))
|
|
else:
|
|
asyncio.create_task(self._clear_all_cache())
|
|
|
|
async def _invalidate_single(self, file_path: str):
|
|
"""Invalidate cache for a single file."""
|
|
async with self._lock:
|
|
self._cache.pop(file_path, None)
|
|
|
|
async def _clear_all_cache(self):
|
|
"""Clear all cache entries."""
|
|
async with self._lock:
|
|
self._cache.clear()
|
|
|
|
|
|
# Global file cache instance
|
|
_global_file_cache: Optional[AsyncFileCache] = None
|
|
_cache_lock = threading.Lock()
|
|
|
|
|
|
def get_global_file_cache() -> AsyncFileCache:
|
|
"""Get the global file cache instance."""
|
|
global _global_file_cache
|
|
if _global_file_cache is None:
|
|
with _cache_lock:
|
|
if _global_file_cache is None:
|
|
_global_file_cache = AsyncFileCache()
|
|
return _global_file_cache
|
|
|
|
|
|
def init_global_file_cache(cache_size: int = 1000, ttl: int = 300) -> AsyncFileCache:
|
|
"""Initialize the global file cache."""
|
|
global _global_file_cache
|
|
with _cache_lock:
|
|
_global_file_cache = AsyncFileCache(cache_size, ttl)
|
|
return _global_file_cache
|
|
|
|
|
|
async def async_read_file(file_path: str, encoding: str = 'utf-8') -> str:
|
|
"""Convenience function: async read file."""
|
|
cache = get_global_file_cache()
|
|
return await cache.read_file(file_path, encoding)
|
|
|
|
|
|
async def async_read_json(file_path: str) -> Dict[str, Any]:
|
|
"""Convenience function: async read JSON file."""
|
|
cache = get_global_file_cache()
|
|
return await cache.read_json(file_path)
|
|
|
|
|
|
async def async_write_file(file_path: str, content: str, encoding: str = 'utf-8'):
|
|
"""Convenience function: async write file."""
|
|
cache = get_global_file_cache()
|
|
await cache.write_file(file_path, content, encoding)
|
|
|
|
|
|
async def async_write_json(file_path: str, data: Dict[str, Any], indent: int = 2):
|
|
"""Convenience function: async write JSON file."""
|
|
cache = get_global_file_cache()
|
|
await cache.write_json(file_path, data, indent)
|
|
|
|
|
|
async def async_file_exists(file_path: str) -> bool:
|
|
"""Convenience function: async check if file exists."""
|
|
cache = get_global_file_cache()
|
|
return await cache.exists(file_path)
|
|
|
|
|
|
async def async_get_file_mtime(file_path: str) -> float:
|
|
"""Convenience function: async get file modification time."""
|
|
cache = get_global_file_cache()
|
|
return await cache.getmtime(file_path)
|
|
|
|
|
|
class ParallelFileReader:
|
|
"""Parallel file reader."""
|
|
|
|
def __init__(self, max_workers: int = 8):
|
|
"""
|
|
Initialize the parallel reader.
|
|
|
|
Args:
|
|
max_workers: Maximum number of worker threads
|
|
"""
|
|
self.max_workers = max_workers
|
|
self._executor = ThreadPoolExecutor(max_workers=max_workers,
|
|
thread_name_prefix="parallel_file_reader")
|
|
|
|
async def read_multiple_files(self, file_paths: List[str],
|
|
encoding: str = 'utf-8') -> Dict[str, str]:
|
|
"""Read multiple files in parallel."""
|
|
loop = asyncio.get_running_loop()
|
|
|
|
# Create parallel tasks
|
|
tasks = []
|
|
for file_path in file_paths:
|
|
task = loop.run_in_executor(
|
|
self._executor, self._read_text_file_sync, file_path, encoding
|
|
)
|
|
tasks.append((file_path, task))
|
|
|
|
# Wait for all tasks to complete
|
|
results = {}
|
|
for file_path, task in tasks:
|
|
try:
|
|
content = await task
|
|
results[file_path] = content
|
|
except Exception as e:
|
|
logger.error(f"Error reading {file_path}: {e}")
|
|
results[file_path] = ""
|
|
|
|
return results
|
|
|
|
def _read_text_file_sync(self, file_path: str, encoding: str) -> str:
|
|
"""Read a text file synchronously in the thread pool."""
|
|
try:
|
|
if not os.path.exists(file_path):
|
|
return ""
|
|
with open(file_path, 'r', encoding=encoding) as f:
|
|
return f.read()
|
|
except Exception:
|
|
return ""
|
|
|
|
async def read_multiple_json(self, file_paths: List[str]) -> Dict[str, Dict[str, Any]]:
|
|
"""Read multiple JSON files in parallel."""
|
|
contents = await self.read_multiple_files(file_paths)
|
|
results = {}
|
|
|
|
for file_path, content in contents.items():
|
|
if content.strip():
|
|
try:
|
|
results[file_path] = json.loads(content)
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Error parsing JSON from {file_path}: {e}")
|
|
results[file_path] = {}
|
|
else:
|
|
results[file_path] = {}
|
|
|
|
return results
|
|
|
|
|
|
# Global parallel reader instance
|
|
_global_parallel_reader: Optional[ParallelFileReader] = None
|
|
_reader_lock = threading.Lock()
|
|
|
|
|
|
def get_global_parallel_reader() -> ParallelFileReader:
|
|
"""Get the global parallel reader instance."""
|
|
global _global_parallel_reader
|
|
if _global_parallel_reader is None:
|
|
with _reader_lock:
|
|
if _global_parallel_reader is None:
|
|
_global_parallel_reader = ParallelFileReader()
|
|
return _global_parallel_reader
|