qwen_agent/utils/async_file_ops.py
朱潮 425f3c5bb4 chore: replace Chinese comments and log messages with English
Convert all Chinese comments, docstrings, logger/print output,
HTTPException detail messages, and API response messages to English
across the entire codebase. Functional zh/ja localized strings
(e.g. prompt templates, timezone display names, date formats) are
preserved as-is.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-30 19:45:35 +08:00

297 lines
9.8 KiB
Python

#!/usr/bin/env python3
"""
Async file operations - provides efficient async file read/write functionality.
"""
import os
import json
import asyncio
import aiofiles
import aiofiles.os
import logging
from typing import Dict, List, Optional, Any
from pathlib import Path
import weakref
import threading
import time
from concurrent.futures import ThreadPoolExecutor
# Configure logger
logger = logging.getLogger('app')
class AsyncFileCache:
"""Async file cache manager."""
def __init__(self, cache_size: int = 1000, ttl: int = 300):
"""
Initialize the file cache.
Args:
cache_size: Maximum number of cached files
ttl: Cache TTL (seconds)
"""
self.cache_size = cache_size
self.ttl = ttl
self._cache = {} # {file_path: (content, timestamp)}
self._lock = asyncio.Lock()
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="async_file_io")
async def read_file(self, file_path: str, encoding: str = 'utf-8') -> str:
"""Async read file content with caching."""
abs_path = os.path.abspath(file_path)
async with self._lock:
# Check cache
if abs_path in self._cache:
content, timestamp = self._cache[abs_path]
if time.time() - timestamp < self.ttl:
return content
# Use thread pool for async file read
loop = asyncio.get_running_loop()
try:
# Check if file exists
exists = await loop.run_in_executor(
self._executor, os.path.exists, abs_path
)
if not exists:
return ""
# Read file content
content = await loop.run_in_executor(
self._executor, self._read_text_file, abs_path, encoding
)
# Update cache (LRU strategy)
if len(self._cache) >= self.cache_size:
# Remove the oldest cache entry
oldest_key = min(self._cache.keys(),
key=lambda k: self._cache[k][1])
del self._cache[oldest_key]
self._cache[abs_path] = (content, time.time())
return content
except Exception as e:
logger.error(f"Error reading file {abs_path}: {e}")
return ""
def _read_text_file(self, file_path: str, encoding: str) -> str:
"""Read a text file synchronously in the thread pool."""
try:
with open(file_path, 'r', encoding=encoding) as f:
return f.read()
except Exception:
return ""
async def read_json(self, file_path: str) -> Dict[str, Any]:
"""Async read a JSON file."""
content = await self.read_file(file_path)
if not content.strip():
return {}
try:
return json.loads(content)
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON from {file_path}: {e}")
return {}
async def write_file(self, file_path: str, content: str, encoding: str = 'utf-8'):
"""Async write file content."""
abs_path = os.path.abspath(file_path)
# Ensure directory exists
dir_path = os.path.dirname(abs_path)
if dir_path:
await aiofiles.os.makedirs(dir_path, exist_ok=True)
# Use aiofiles for async write
async with aiofiles.open(file_path, 'w', encoding=encoding) as f:
await f.write(content)
# Update cache
async with self._lock:
self._cache[abs_path] = (content, time.time())
async def write_json(self, file_path: str, data: Dict[str, Any], indent: int = 2):
"""Async write a JSON file."""
content = json.dumps(data, ensure_ascii=False, indent=indent)
await self.write_file(file_path, content)
async def exists(self, file_path: str) -> bool:
"""Async check if a file exists."""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
self._executor, os.path.exists, file_path
)
async def getmtime(self, file_path: str) -> float:
"""Async get file modification time."""
loop = asyncio.get_running_loop()
try:
return await loop.run_in_executor(
self._executor, os.path.getmtime, file_path
)
except OSError:
return 0.0
def invalidate_cache(self, file_path: Optional[str] = None):
"""Invalidate cache entries."""
if file_path:
abs_path = os.path.abspath(file_path)
asyncio.create_task(self._invalidate_single(abs_path))
else:
asyncio.create_task(self._clear_all_cache())
async def _invalidate_single(self, file_path: str):
"""Invalidate cache for a single file."""
async with self._lock:
self._cache.pop(file_path, None)
async def _clear_all_cache(self):
"""Clear all cache entries."""
async with self._lock:
self._cache.clear()
# Global file cache instance
_global_file_cache: Optional[AsyncFileCache] = None
_cache_lock = threading.Lock()
def get_global_file_cache() -> AsyncFileCache:
"""Get the global file cache instance."""
global _global_file_cache
if _global_file_cache is None:
with _cache_lock:
if _global_file_cache is None:
_global_file_cache = AsyncFileCache()
return _global_file_cache
def init_global_file_cache(cache_size: int = 1000, ttl: int = 300) -> AsyncFileCache:
"""Initialize the global file cache."""
global _global_file_cache
with _cache_lock:
_global_file_cache = AsyncFileCache(cache_size, ttl)
return _global_file_cache
async def async_read_file(file_path: str, encoding: str = 'utf-8') -> str:
"""Convenience function: async read file."""
cache = get_global_file_cache()
return await cache.read_file(file_path, encoding)
async def async_read_json(file_path: str) -> Dict[str, Any]:
"""Convenience function: async read JSON file."""
cache = get_global_file_cache()
return await cache.read_json(file_path)
async def async_write_file(file_path: str, content: str, encoding: str = 'utf-8'):
"""Convenience function: async write file."""
cache = get_global_file_cache()
await cache.write_file(file_path, content, encoding)
async def async_write_json(file_path: str, data: Dict[str, Any], indent: int = 2):
"""Convenience function: async write JSON file."""
cache = get_global_file_cache()
await cache.write_json(file_path, data, indent)
async def async_file_exists(file_path: str) -> bool:
"""Convenience function: async check if file exists."""
cache = get_global_file_cache()
return await cache.exists(file_path)
async def async_get_file_mtime(file_path: str) -> float:
"""Convenience function: async get file modification time."""
cache = get_global_file_cache()
return await cache.getmtime(file_path)
class ParallelFileReader:
"""Parallel file reader."""
def __init__(self, max_workers: int = 8):
"""
Initialize the parallel reader.
Args:
max_workers: Maximum number of worker threads
"""
self.max_workers = max_workers
self._executor = ThreadPoolExecutor(max_workers=max_workers,
thread_name_prefix="parallel_file_reader")
async def read_multiple_files(self, file_paths: List[str],
encoding: str = 'utf-8') -> Dict[str, str]:
"""Read multiple files in parallel."""
loop = asyncio.get_running_loop()
# Create parallel tasks
tasks = []
for file_path in file_paths:
task = loop.run_in_executor(
self._executor, self._read_text_file_sync, file_path, encoding
)
tasks.append((file_path, task))
# Wait for all tasks to complete
results = {}
for file_path, task in tasks:
try:
content = await task
results[file_path] = content
except Exception as e:
logger.error(f"Error reading {file_path}: {e}")
results[file_path] = ""
return results
def _read_text_file_sync(self, file_path: str, encoding: str) -> str:
"""Read a text file synchronously in the thread pool."""
try:
if not os.path.exists(file_path):
return ""
with open(file_path, 'r', encoding=encoding) as f:
return f.read()
except Exception:
return ""
async def read_multiple_json(self, file_paths: List[str]) -> Dict[str, Dict[str, Any]]:
"""Read multiple JSON files in parallel."""
contents = await self.read_multiple_files(file_paths)
results = {}
for file_path, content in contents.items():
if content.strip():
try:
results[file_path] = json.loads(content)
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON from {file_path}: {e}")
results[file_path] = {}
else:
results[file_path] = {}
return results
# Global parallel reader instance
_global_parallel_reader: Optional[ParallelFileReader] = None
_reader_lock = threading.Lock()
def get_global_parallel_reader() -> ParallelFileReader:
"""Get the global parallel reader instance."""
global _global_parallel_reader
if _global_parallel_reader is None:
with _reader_lock:
if _global_parallel_reader is None:
_global_parallel_reader = ParallelFileReader()
return _global_parallel_reader