qwen_agent/utils/system_optimizer.py
2025-11-27 21:50:03 +08:00

313 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
系统配置优化 - 调整系统参数以提高并发性能
"""
import os
import asyncio
import multiprocessing
import threading
import time
import resource
import logging
from typing import Dict, Any, Optional
from concurrent.futures import ThreadPoolExecutor
# 配置日志
logger = logging.getLogger('app')
class SystemOptimizer:
"""系统优化器
调整系统参数以提高并发性能
"""
def __init__(self):
self.original_settings = {}
self.optimized = False
def optimize_system_settings(self):
"""优化系统设置"""
if self.optimized:
return
# 保存原始设置
self._backup_original_settings()
# 1. 优化文件描述符限制
try:
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
new_limit = min(65536, hard) # 增加到65536或硬件限制
resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, hard))
self.original_settings['RLIMIT_NOFILE'] = (soft, hard)
logger.info(f"文件描述符限制从 {soft} 增加到 {new_limit}")
except (ValueError, OSError) as e:
logger.error(f"无法设置文件描述符限制: {e}")
# 2. 优化线程栈大小
try:
soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
new_stack = min(8 * 1024 * 1024, hard) # 8MB栈大小
resource.setrlimit(resource.RLIMIT_STACK, (new_stack, hard))
self.original_settings['RLIMIT_STACK'] = (soft, hard)
logger.info(f"线程栈大小设置为 {new_stack // (1024*1024)}MB")
except (ValueError, OSError) as e:
logger.error(f"无法设置线程栈大小: {e}")
# 3. 环境变量优化
env_vars = {
# Python优化
'PYTHONUNBUFFERED': '1', # 禁用输出缓冲
'PYTHONDONTWRITEBYTECODE': '1', # 不写.pyc文件
# Tokenizer优化 - 允许适度的并行度
'TOKENIZERS_PARALLELISM': 'true', # 改为true以提高并发
'TOKENIZERS_FAST': '1', # 启用快速tokenizer
# OpenMP优化
'OMP_NUM_THREADS': str(min(8, multiprocessing.cpu_count())), # 限制OpenMP线程
'OMP_WAIT_POLICY': 'PASSIVE', # 被动等待策略
# 内存优化
'MALLOC_TRIM_THRESHOLD_': '100000', # 内存整理阈值
# 网络优化
'TCP_NODELAY': '1', # 禁用Nagle算法
# Hugging Face优化
'TRANSFORMERS_CACHE': '/tmp/transformers_cache', # 使用tmpfs加速缓存
'HF_OFFLINE': '0', # 在线模式
# CUDA优化如果使用GPU
'CUDA_LAUNCH_BLOCKING': '0', # 异步CUDA启动
# asyncio优化
'UVLOOP_ENABLED': '1', # 启用uvloop如果可用
}
for key, value in env_vars.items():
if key not in os.environ:
os.environ[key] = value
logger.info(f"设置环境变量: {key}={value}")
self.optimized = True
logger.info("系统优化完成")
def _backup_original_settings(self):
"""备份原始设置"""
try:
self.original_settings['RLIMIT_NOFILE'] = resource.getrlimit(resource.RLIMIT_NOFILE)
self.original_settings['RLIMIT_STACK'] = resource.getrlimit(resource.RLIMIT_STACK)
except:
pass
# 备份重要环境变量
env_keys = [
'TOKENIZERS_PARALLELISM',
'PYTHONUNBUFFERED',
'PYTHONDONTWRITEBYTECODE',
'OMP_NUM_THREADS'
]
for key in env_keys:
if key in os.environ:
self.original_settings[key] = os.environ[key]
def restore_original_settings(self):
"""恢复原始设置"""
if not self.original_settings:
return
logger.info("恢复原始系统设置...")
# 恢复资源限制
if 'RLIMIT_NOFILE' in self.original_settings:
try:
resource.setrlimit(resource.RLIMIT_NOFILE, self.original_settings['RLIMIT_NOFILE'])
logger.info(f"恢复文件描述符限制")
except:
pass
if 'RLIMIT_STACK' in self.original_settings:
try:
resource.setrlimit(resource.RLIMIT_STACK, self.original_settings['RLIMIT_STACK'])
logger.info(f"恢复线程栈大小")
except:
pass
# 恢复环境变量
for key, value in self.original_settings.items():
if key.startswith('TOKENIZERS_') or key in ['PYTHONUNBUFFERED', 'PYTHONDONTWRITEBYTECODE']:
if key in os.environ:
del os.environ[key]
logger.info(f"移除环境变量: {key}")
elif key in ['OMP_NUM_THREADS'] and value is not None:
os.environ[key] = value
logger.info(f"恢复环境变量: {key}={value}")
self.optimized = False
logger.info("系统设置已恢复")
class AsyncioOptimizer:
"""asyncio优化器"""
@staticmethod
def setup_event_loop_policy():
"""设置优化的事件循环策略"""
try:
# 尝试使用uvloop如果可用
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
logger.info("使用uvloop事件循环策略")
except ImportError:
logger.info("使用默认事件循环策略")
# 设置线程池大小
cpu_count = multiprocessing.cpu_count()
thread_pool_size = min(32, cpu_count * 4) # 每CPU核心4个线程最多32个
# 注意:不能在这里设置默认线程池执行器,因为还没有运行事件循环
# 这个设置会在应用启动时进行
logger.info(f"建议线程池大小: {thread_pool_size}")
@staticmethod
def optimize_gunicorn_settings() -> Dict[str, Any]:
"""获取优化的Gunicorn设置"""
cpu_count = multiprocessing.cpu_count()
return {
# Worker配置
'workers': min(8, cpu_count + 1), # 工作进程数
'worker_class': 'uvicorn.workers.UvicornWorker', # 使用Uvicorn worker
'worker_connections': 2000, # 每个worker的连接数
'max_requests': 5000, # 最大请求数后重启worker
'max_requests_jitter': 500, # 随机抖动
'preload_app': True, # 预加载应用
# 超时设置
'timeout': 120, # 工作超时
'keepalive': 5, # Keep-Alive超时
'graceful_timeout': 30, # 优雅关闭超时
# 性能优化
'worker_tmp_dir': '/dev/shm', # 使用内存文件系统
# 日志设置
'accesslog': '-', # 标准输出
'errorlog': '-', # 标准错误输出
'loglevel': 'info',
}
def setup_system_optimizations():
"""设置系统优化"""
# 1. 系统级优化
system_optimizer = SystemOptimizer()
system_optimizer.optimize_system_settings()
# 2. asyncio优化
asyncio_optimizer = AsyncioOptimizer()
asyncio_optimizer.setup_event_loop_policy()
return system_optimizer
def create_performance_monitor() -> Dict[str, Any]:
"""创建性能监控配置"""
return {
'monitor_interval': 60, # 监控间隔(秒)
'metrics': {
'memory_usage': True,
'cpu_usage': True,
'disk_io': True,
'network_io': True,
'active_connections': True,
'request_latency': True,
'cache_hit_rate': True,
'error_rate': True,
},
'alerts': {
'memory_threshold': 0.9, # 90%内存使用率告警
'cpu_threshold': 0.8, # 80%CPU使用率告警
'disk_threshold': 0.9, # 90%磁盘使用率告警
'error_threshold': 0.05, # 5%错误率告警
'latency_threshold': 5.0, # 5秒延迟告警
}
}
def get_optimized_worker_config() -> Dict[str, Any]:
"""获取优化的worker配置"""
cpu_count = multiprocessing.cpu_count()
memory_gb = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') / (1024.0 ** 3)
# 基于资源限制的配置
max_workers = min(
16, # 最大worker数
max(2, cpu_count), # 至少2个worker最多CPU核心数
int(memory_gb / 2) # 基于内存的worker限制每worker 2GB
)
return {
'max_workers': max_workers,
'worker_connections': 1000, # 每个worker的连接数
'connection_pool_size': 100, # 连接池大小
'buffer_size': 8192, # 缓冲区大小
'timeout': 120, # 超时时间
'keepalive_timeout': 30, # Keep-Alive超时
}
# 预定义的优化配置
OPTIMIZATION_CONFIGS = {
'low_memory': {
'max_workers': 2,
'worker_connections': 500,
'buffer_size': 4096,
'cache_size': 500,
},
'balanced': {
'max_workers': 4,
'worker_connections': 1000,
'buffer_size': 8192,
'cache_size': 1000,
},
'high_performance': {
'max_workers': 8,
'worker_connections': 2000,
'buffer_size': 16384,
'cache_size': 2000,
}
}
def apply_optimization_profile(profile_name: str) -> Dict[str, Any]:
"""应用优化配置文件"""
if profile_name not in OPTIMIZATION_CONFIGS:
raise ValueError(f"未知的优化配置: {profile_name}")
config = OPTIMIZATION_CONFIGS[profile_name].copy()
# 添加系统特定配置
config.update({
'profile_name': profile_name,
'applied_at': time.time(),
'cpu_count': multiprocessing.cpu_count(),
'memory_gb': os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') / (1024.0 ** 3)
})
return config
# 全局系统优化器实例
_global_system_optimizer: Optional[SystemOptimizer] = None
def get_global_system_optimizer() -> SystemOptimizer:
"""获取全局系统优化器"""
global _global_system_optimizer
if _global_system_optimizer is None:
_global_system_optimizer = SystemOptimizer()
return _global_system_optimizer