cfg
This commit is contained in:
parent
9245864314
commit
531e6de69e
@ -22,13 +22,33 @@
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "请分析项目中的数据文件"
|
||||
"content": "HP Elite Mini 800 G9ってノートPC?"
|
||||
}
|
||||
],
|
||||
"model": "qwen3-next",
|
||||
"stream": true,
|
||||
"model": "qwen/qwen3-next-80b-a3b-instruct",
|
||||
"api_key": "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",
|
||||
"model_server": "https://openrouter.ai/api/v1",
|
||||
"zip_url": "https://example.com/my-project.zip",
|
||||
"stream": false
|
||||
"zip_url": "http://127.0.0.1:8080/all_hp_product_spec_book2506.zip",
|
||||
"extra_prompt": "## 其他说明\n1. 查询的设备类型为第一优先级,比如笔记本和台式机。\n2. 针对\"CPU处理器\"和\"GPU显卡\"的查询,因为命名方式多样性,查询优先级最低。\n3. 如果确实无法找到完全匹配的数据,根据用户要求,可接受性能更高(更低)的CPU处理器和GPU显卡是作为代替。"
|
||||
"generate_cfg": {
|
||||
# This parameter will affect the tool-call parsing logic. Default is False:
|
||||
# Set to True: when content is `<think>this is the thought</think>this is the answer`
|
||||
# Set to False: when response consists of reasoning_content and content
|
||||
# 'thought_in_content': True,
|
||||
|
||||
# tool-call template: default is nous (recommended for qwen3):
|
||||
# 'fncall_prompt_type': 'nous',
|
||||
|
||||
# Maximum input length, messages will be truncated if they exceed this length, please adjust according to model API:
|
||||
# 'max_input_tokens': 58000,
|
||||
|
||||
# Parameters that will be passed directly to the model API, such as top_p, enable_thinking, etc., according to the API specifications:
|
||||
# 'top_p': 0.8,
|
||||
|
||||
# Using the API's native tool call interface
|
||||
# 'use_raw_api': True,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@ -154,4 +174,4 @@ response = requests.post("http://localhost:8000/chat/completions", json={
|
||||
- **缓存**: 基于URL哈希的文件缓存
|
||||
- **并发安全**: 支持多并发请求处理
|
||||
|
||||
这个功能实现了极简的无状态项目管理,用户只需在最外层提供model_server和zip_url参数,系统会自动处理模型配置、项目标识生成、下载、解压和缓存,最大程度简化了项目管理的复杂度。
|
||||
这个功能实现了极简的无状态项目管理,用户只需在最外层提供model_server和zip_url参数,系统会自动处理模型配置、项目标识生成、下载、解压和缓存,最大程度简化了项目管理的复杂度。
|
||||
|
||||
Binary file not shown.
@ -1,7 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
from typing import AsyncGenerator, Dict, List, Optional, Union
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncGenerator, Dict, List, Optional, Union
|
||||
|
||||
import uvicorn
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
||||
@ -88,9 +88,8 @@ class ChatRequest(BaseModel):
|
||||
api_key: Optional[str] = None
|
||||
model_server: Optional[str] = None
|
||||
zip_url: Optional[str] = None
|
||||
extra: Optional[Dict] = None
|
||||
generate_cfg: Optional[Dict] = None
|
||||
stream: Optional[bool] = False
|
||||
file_url: Optional[str] = None
|
||||
extra_prompt: Optional[str] = None
|
||||
|
||||
|
||||
@ -202,7 +201,7 @@ async def chat_completions(request: ChatRequest):
|
||||
agent = await get_agent_from_pool(timeout=30.0)
|
||||
|
||||
# 动态设置请求的模型,支持从接口传入api_key、model_server和extra参数
|
||||
update_agent_llm(agent, request.model, request.api_key, request.model_server)
|
||||
update_agent_llm(agent, request.model, request.api_key, request.model_server, request.generate_cfg)
|
||||
|
||||
extra_prompt = request.extra_prompt if request.extra_prompt else ""
|
||||
# 构建包含项目信息的消息上下文
|
||||
|
||||
@ -127,14 +127,15 @@ def init_agent_service_universal():
|
||||
return bot
|
||||
|
||||
|
||||
def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None):
|
||||
def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None,generate_cfg: Dict = None):
|
||||
"""动态更新助手实例的LLM,支持从接口传入参数"""
|
||||
|
||||
# 获取基础配置
|
||||
llm_config = {
|
||||
"model": model_name,
|
||||
"api_key": api_key,
|
||||
"model_server": model_server
|
||||
"model_server": model_server,
|
||||
"generate_cfg": generate_cfg if generate_cfg else {}
|
||||
}
|
||||
|
||||
# 创建LLM实例,确保不是字典
|
||||
|
||||
Loading…
Reference in New Issue
Block a user