修改balance资源配置
This commit is contained in:
parent
06102559ef
commit
bf11975183
@ -496,7 +496,7 @@ MODEL_SERVER=https://openrouter.ai/api/v1
|
||||
API_KEY=your-api-key
|
||||
|
||||
# 队列配置
|
||||
AGENT_CACHE_MAX_SIZE=20
|
||||
TOOL_CACHE_MAX_SIZE=20
|
||||
|
||||
# 其他配置
|
||||
TOKENIZERS_PARALLELISM=false
|
||||
|
||||
@ -12,7 +12,7 @@ from collections import OrderedDict
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import cachetools
|
||||
from utils.settings import AGENT_CACHE_MAX_SIZE, AGENT_CACHE_TTL, AGENT_CACHE_AUTO_RENEW
|
||||
from utils.settings import TOOL_CACHE_MAX_SIZE, TOOL_CACHE_TTL, TOOL_CACHE_AUTO_RENEW
|
||||
|
||||
logger = logging.getLogger('app')
|
||||
|
||||
@ -371,9 +371,9 @@ def get_memory_cache_manager() -> AgentMemoryCacheManager:
|
||||
if _global_cache_manager is None:
|
||||
# 从 settings 导入配置
|
||||
_global_cache_manager = AgentMemoryCacheManager(
|
||||
max_size=AGENT_CACHE_MAX_SIZE,
|
||||
default_ttl=AGENT_CACHE_TTL,
|
||||
auto_renew=AGENT_CACHE_AUTO_RENEW
|
||||
max_size=TOOL_CACHE_MAX_SIZE,
|
||||
default_ttl=TOOL_CACHE_TTL,
|
||||
auto_renew=TOOL_CACHE_AUTO_RENEW
|
||||
)
|
||||
|
||||
return _global_cache_manager
|
||||
@ -19,15 +19,15 @@ Agent 内存缓存系统使用 cachetools 库在内存中缓存 Agent 实例,
|
||||
|
||||
可以通过以下环境变量配置缓存行为:
|
||||
|
||||
- `AGENT_CACHE_MAX_SIZE`:最大缓存项数(默认:`1000`)
|
||||
- `AGENT_CACHE_TTL`:默认过期时间,秒(默认:`180` = 3分钟)
|
||||
- `AGENT_CACHE_AUTO_RENEW`:是否自动续期(默认:`true`)
|
||||
- `TOOL_CACHE_MAX_SIZE`:最大缓存项数(默认:`1000`)
|
||||
- `TOOL_CACHE_TTL`:默认过期时间,秒(默认:`180` = 3分钟)
|
||||
- `TOOL_CACHE_AUTO_RENEW`:是否自动续期(默认:`true`)
|
||||
|
||||
示例:
|
||||
```bash
|
||||
export AGENT_CACHE_MAX_SIZE="500"
|
||||
export AGENT_CACHE_TTL="300" # 5分钟
|
||||
export AGENT_CACHE_AUTO_RENEW="false"
|
||||
export TOOL_CACHE_MAX_SIZE="500"
|
||||
export TOOL_CACHE_TTL="300" # 5分钟
|
||||
export TOOL_CACHE_AUTO_RENEW="false"
|
||||
```
|
||||
|
||||
### 代码配置
|
||||
|
||||
@ -65,7 +65,7 @@
|
||||
### 环境变量配置
|
||||
```bash
|
||||
# Agent缓存配置
|
||||
export AGENT_CACHE_MAX_SIZE=50
|
||||
export TOOL_CACHE_MAX_SIZE=50
|
||||
|
||||
# Tokenizer优化
|
||||
export TOKENIZERS_PARALLELISM=true
|
||||
|
||||
@ -62,17 +62,17 @@ setup_environment() {
|
||||
case $PROFILE in
|
||||
"low_memory")
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export AGENT_CACHE_MAX_SIZE=20
|
||||
export TOOL_CACHE_MAX_SIZE=20
|
||||
;;
|
||||
"balanced")
|
||||
export TOKENIZERS_PARALLELISM=true
|
||||
export TOKENIZERS_FAST=1
|
||||
export AGENT_CACHE_MAX_SIZE=50
|
||||
export TOOL_CACHE_MAX_SIZE=50
|
||||
;;
|
||||
"high_performance")
|
||||
export TOKENIZERS_PARALLELISM=true
|
||||
export TOKENIZERS_FAST=1
|
||||
export AGENT_CACHE_MAX_SIZE=100
|
||||
export TOOL_CACHE_MAX_SIZE=100
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
@ -226,19 +226,19 @@ class ProcessManager:
|
||||
if args.profile == "low_memory":
|
||||
env_vars = {
|
||||
'TOKENIZERS_PARALLELISM': 'false',
|
||||
'AGENT_CACHE_MAX_SIZE': '20',
|
||||
'TOOL_CACHE_MAX_SIZE': '10',
|
||||
}
|
||||
elif args.profile == "balanced":
|
||||
env_vars = {
|
||||
'TOKENIZERS_PARALLELISM': 'true',
|
||||
'TOKENIZERS_FAST': '1',
|
||||
'AGENT_CACHE_MAX_SIZE': '50',
|
||||
'TOOL_CACHE_MAX_SIZE': '20',
|
||||
}
|
||||
elif args.profile == "high_performance":
|
||||
env_vars = {
|
||||
'TOKENIZERS_PARALLELISM': 'true',
|
||||
'TOKENIZERS_FAST': '1',
|
||||
'AGENT_CACHE_MAX_SIZE': '100',
|
||||
'TOOL_CACHE_MAX_SIZE': '50',
|
||||
}
|
||||
|
||||
# 通用优化
|
||||
@ -331,11 +331,11 @@ def main():
|
||||
if args.api_workers is None:
|
||||
cpu_count = multiprocessing.cpu_count()
|
||||
if args.profile == "low_memory":
|
||||
args.api_workers = min(2, cpu_count)
|
||||
args.api_workers = min(1, cpu_count)
|
||||
elif args.profile == "balanced":
|
||||
args.api_workers = min(4, cpu_count + 1)
|
||||
args.api_workers = min(2, cpu_count + 1)
|
||||
elif args.profile == "high_performance":
|
||||
args.api_workers = min(8, cpu_count * 2)
|
||||
args.api_workers = min(4, cpu_count * 2)
|
||||
|
||||
# 创建进程管理器并运行
|
||||
manager = ProcessManager()
|
||||
|
||||
@ -9,9 +9,9 @@ SUMMARIZATION_MAX_TOKENS = MAX_CONTEXT_TOKENS - MAX_OUTPUT_TOKENS - 1000
|
||||
SUMMARIZATION_MESSAGES_TO_KEEP = int(os.getenv("SUMMARIZATION_MESSAGES_TO_KEEP", 20))
|
||||
|
||||
# Agent Cache Settings
|
||||
AGENT_CACHE_MAX_SIZE = int(os.getenv("AGENT_CACHE_MAX_SIZE", 20))
|
||||
AGENT_CACHE_TTL = int(os.getenv("AGENT_CACHE_TTL", 180))
|
||||
AGENT_CACHE_AUTO_RENEW = os.getenv("AGENT_CACHE_AUTO_RENEW", "true") == "true"
|
||||
TOOL_CACHE_MAX_SIZE = int(os.getenv("TOOL_CACHE_MAX_SIZE", 20))
|
||||
TOOL_CACHE_TTL = int(os.getenv("TOOL_CACHE_TTL", 180))
|
||||
TOOL_CACHE_AUTO_RENEW = os.getenv("TOOL_CACHE_AUTO_RENEW", "true") == "true"
|
||||
|
||||
# API Settings
|
||||
BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
|
||||
@ -57,6 +57,6 @@ CHECKPOINT_BUSY_TIMEOUT = int(os.getenv("CHECKPOINT_BUSY_TIMEOUT", "10000"))
|
||||
|
||||
# 连接池大小
|
||||
# 同时可以持有的最大连接数
|
||||
CHECKPOINT_POOL_SIZE = int(os.getenv("CHECKPOINT_POOL_SIZE", "30"))
|
||||
CHECKPOINT_POOL_SIZE = int(os.getenv("CHECKPOINT_POOL_SIZE", "15"))
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user