#!/usr/bin/env python3 """ Async file operations - provides efficient async file read/write functionality. """ import os import json import asyncio import aiofiles import aiofiles.os import logging from typing import Dict, List, Optional, Any from pathlib import Path import weakref import threading import time from concurrent.futures import ThreadPoolExecutor # Configure logger logger = logging.getLogger('app') class AsyncFileCache: """Async file cache manager.""" def __init__(self, cache_size: int = 1000, ttl: int = 300): """ Initialize the file cache. Args: cache_size: Maximum number of cached files ttl: Cache TTL (seconds) """ self.cache_size = cache_size self.ttl = ttl self._cache = {} # {file_path: (content, timestamp)} self._lock = asyncio.Lock() self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="async_file_io") async def read_file(self, file_path: str, encoding: str = 'utf-8') -> str: """Async read file content with caching.""" abs_path = os.path.abspath(file_path) async with self._lock: # Check cache if abs_path in self._cache: content, timestamp = self._cache[abs_path] if time.time() - timestamp < self.ttl: return content # Use thread pool for async file read loop = asyncio.get_running_loop() try: # Check if file exists exists = await loop.run_in_executor( self._executor, os.path.exists, abs_path ) if not exists: return "" # Read file content content = await loop.run_in_executor( self._executor, self._read_text_file, abs_path, encoding ) # Update cache (LRU strategy) if len(self._cache) >= self.cache_size: # Remove the oldest cache entry oldest_key = min(self._cache.keys(), key=lambda k: self._cache[k][1]) del self._cache[oldest_key] self._cache[abs_path] = (content, time.time()) return content except Exception as e: logger.error(f"Error reading file {abs_path}: {e}") return "" def _read_text_file(self, file_path: str, encoding: str) -> str: """Read a text file synchronously in the thread pool.""" try: with open(file_path, 'r', encoding=encoding) as f: return f.read() except Exception: return "" async def read_json(self, file_path: str) -> Dict[str, Any]: """Async read a JSON file.""" content = await self.read_file(file_path) if not content.strip(): return {} try: return json.loads(content) except json.JSONDecodeError as e: logger.error(f"Error parsing JSON from {file_path}: {e}") return {} async def write_file(self, file_path: str, content: str, encoding: str = 'utf-8'): """Async write file content.""" abs_path = os.path.abspath(file_path) # Ensure directory exists dir_path = os.path.dirname(abs_path) if dir_path: await aiofiles.os.makedirs(dir_path, exist_ok=True) # Use aiofiles for async write async with aiofiles.open(file_path, 'w', encoding=encoding) as f: await f.write(content) # Update cache async with self._lock: self._cache[abs_path] = (content, time.time()) async def write_json(self, file_path: str, data: Dict[str, Any], indent: int = 2): """Async write a JSON file.""" content = json.dumps(data, ensure_ascii=False, indent=indent) await self.write_file(file_path, content) async def exists(self, file_path: str) -> bool: """Async check if a file exists.""" loop = asyncio.get_running_loop() return await loop.run_in_executor( self._executor, os.path.exists, file_path ) async def getmtime(self, file_path: str) -> float: """Async get file modification time.""" loop = asyncio.get_running_loop() try: return await loop.run_in_executor( self._executor, os.path.getmtime, file_path ) except OSError: return 0.0 def invalidate_cache(self, file_path: Optional[str] = None): """Invalidate cache entries.""" if file_path: abs_path = os.path.abspath(file_path) asyncio.create_task(self._invalidate_single(abs_path)) else: asyncio.create_task(self._clear_all_cache()) async def _invalidate_single(self, file_path: str): """Invalidate cache for a single file.""" async with self._lock: self._cache.pop(file_path, None) async def _clear_all_cache(self): """Clear all cache entries.""" async with self._lock: self._cache.clear() # Global file cache instance _global_file_cache: Optional[AsyncFileCache] = None _cache_lock = threading.Lock() def get_global_file_cache() -> AsyncFileCache: """Get the global file cache instance.""" global _global_file_cache if _global_file_cache is None: with _cache_lock: if _global_file_cache is None: _global_file_cache = AsyncFileCache() return _global_file_cache def init_global_file_cache(cache_size: int = 1000, ttl: int = 300) -> AsyncFileCache: """Initialize the global file cache.""" global _global_file_cache with _cache_lock: _global_file_cache = AsyncFileCache(cache_size, ttl) return _global_file_cache async def async_read_file(file_path: str, encoding: str = 'utf-8') -> str: """Convenience function: async read file.""" cache = get_global_file_cache() return await cache.read_file(file_path, encoding) async def async_read_json(file_path: str) -> Dict[str, Any]: """Convenience function: async read JSON file.""" cache = get_global_file_cache() return await cache.read_json(file_path) async def async_write_file(file_path: str, content: str, encoding: str = 'utf-8'): """Convenience function: async write file.""" cache = get_global_file_cache() await cache.write_file(file_path, content, encoding) async def async_write_json(file_path: str, data: Dict[str, Any], indent: int = 2): """Convenience function: async write JSON file.""" cache = get_global_file_cache() await cache.write_json(file_path, data, indent) async def async_file_exists(file_path: str) -> bool: """Convenience function: async check if file exists.""" cache = get_global_file_cache() return await cache.exists(file_path) async def async_get_file_mtime(file_path: str) -> float: """Convenience function: async get file modification time.""" cache = get_global_file_cache() return await cache.getmtime(file_path) class ParallelFileReader: """Parallel file reader.""" def __init__(self, max_workers: int = 8): """ Initialize the parallel reader. Args: max_workers: Maximum number of worker threads """ self.max_workers = max_workers self._executor = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="parallel_file_reader") async def read_multiple_files(self, file_paths: List[str], encoding: str = 'utf-8') -> Dict[str, str]: """Read multiple files in parallel.""" loop = asyncio.get_running_loop() # Create parallel tasks tasks = [] for file_path in file_paths: task = loop.run_in_executor( self._executor, self._read_text_file_sync, file_path, encoding ) tasks.append((file_path, task)) # Wait for all tasks to complete results = {} for file_path, task in tasks: try: content = await task results[file_path] = content except Exception as e: logger.error(f"Error reading {file_path}: {e}") results[file_path] = "" return results def _read_text_file_sync(self, file_path: str, encoding: str) -> str: """Read a text file synchronously in the thread pool.""" try: if not os.path.exists(file_path): return "" with open(file_path, 'r', encoding=encoding) as f: return f.read() except Exception: return "" async def read_multiple_json(self, file_paths: List[str]) -> Dict[str, Dict[str, Any]]: """Read multiple JSON files in parallel.""" contents = await self.read_multiple_files(file_paths) results = {} for file_path, content in contents.items(): if content.strip(): try: results[file_path] = json.loads(content) except json.JSONDecodeError as e: logger.error(f"Error parsing JSON from {file_path}: {e}") results[file_path] = {} else: results[file_path] = {} return results # Global parallel reader instance _global_parallel_reader: Optional[ParallelFileReader] = None _reader_lock = threading.Lock() def get_global_parallel_reader() -> ParallelFileReader: """Get the global parallel reader instance.""" global _global_parallel_reader if _global_parallel_reader is None: with _reader_lock: if _global_parallel_reader is None: _global_parallel_reader = ParallelFileReader() return _global_parallel_reader