This commit is contained in:
朱潮 2025-10-07 22:07:50 +08:00
parent 9245864314
commit 531e6de69e
4 changed files with 31 additions and 11 deletions

View File

@ -22,13 +22,33 @@
"messages": [
{
"role": "user",
"content": "请分析项目中的数据文件"
"content": "HP Elite Mini 800 G9ってートPC"
}
],
"model": "qwen3-next",
"stream": true,
"model": "qwen/qwen3-next-80b-a3b-instruct",
"api_key": "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",
"model_server": "https://openrouter.ai/api/v1",
"zip_url": "https://example.com/my-project.zip",
"stream": false
"zip_url": "http://127.0.0.1:8080/all_hp_product_spec_book2506.zip",
"extra_prompt": "## 其他说明\n1. 查询的设备类型为第一优先级,比如笔记本和台式机。\n2. 针对\"CPU处理器\"和\"GPU显卡\"的查询,因为命名方式多样性,查询优先级最低。\n3. 如果确实无法找到完全匹配的数据,根据用户要求,可接受性能更高(更低)的CPU处理器和GPU显卡是作为代替。"
"generate_cfg": {
# This parameter will affect the tool-call parsing logic. Default is False:
# Set to True: when content is `<think>this is the thought</think>this is the answer`
# Set to False: when response consists of reasoning_content and content
# 'thought_in_content': True,
# tool-call template: default is nous (recommended for qwen3):
# 'fncall_prompt_type': 'nous',
# Maximum input length, messages will be truncated if they exceed this length, please adjust according to model API:
# 'max_input_tokens': 58000,
# Parameters that will be passed directly to the model API, such as top_p, enable_thinking, etc., according to the API specifications:
# 'top_p': 0.8,
# Using the API's native tool call interface
# 'use_raw_api': True,
}
}
```
@ -154,4 +174,4 @@ response = requests.post("http://localhost:8000/chat/completions", json={
- **缓存**: 基于URL哈希的文件缓存
- **并发安全**: 支持多并发请求处理
这个功能实现了极简的无状态项目管理用户只需在最外层提供model_server和zip_url参数系统会自动处理模型配置、项目标识生成、下载、解压和缓存最大程度简化了项目管理的复杂度。
这个功能实现了极简的无状态项目管理用户只需在最外层提供model_server和zip_url参数系统会自动处理模型配置、项目标识生成、下载、解压和缓存最大程度简化了项目管理的复杂度。

View File

@ -1,7 +1,7 @@
import json
import os
from typing import AsyncGenerator, Dict, List, Optional, Union
from contextlib import asynccontextmanager
from typing import AsyncGenerator, Dict, List, Optional, Union
import uvicorn
from fastapi import BackgroundTasks, FastAPI, HTTPException
@ -88,9 +88,8 @@ class ChatRequest(BaseModel):
api_key: Optional[str] = None
model_server: Optional[str] = None
zip_url: Optional[str] = None
extra: Optional[Dict] = None
generate_cfg: Optional[Dict] = None
stream: Optional[bool] = False
file_url: Optional[str] = None
extra_prompt: Optional[str] = None
@ -202,7 +201,7 @@ async def chat_completions(request: ChatRequest):
agent = await get_agent_from_pool(timeout=30.0)
# 动态设置请求的模型支持从接口传入api_key、model_server和extra参数
update_agent_llm(agent, request.model, request.api_key, request.model_server)
update_agent_llm(agent, request.model, request.api_key, request.model_server, request.generate_cfg)
extra_prompt = request.extra_prompt if request.extra_prompt else ""
# 构建包含项目信息的消息上下文

View File

@ -127,14 +127,15 @@ def init_agent_service_universal():
return bot
def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None):
def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None,generate_cfg: Dict = None):
"""动态更新助手实例的LLM支持从接口传入参数"""
# 获取基础配置
llm_config = {
"model": model_name,
"api_key": api_key,
"model_server": model_server
"model_server": model_server,
"generate_cfg": generate_cfg if generate_cfg else {}
}
# 创建LLM实例确保不是字典