cfg

2025-10-07 22:07:50 +08:00 · 2025-10-07 22:07:50 +08:00 · 531e6de69e
commit 531e6de69e
parent 9245864314
4 changed files with 31 additions and 11 deletions
--- a/ZIP_PROJECT_README.md
+++ b/ZIP_PROJECT_README.md
@ -22,13 +22,33 @@
  "messages": [
    {
      "role": "user",
-      "content": "请分析项目中的数据文件"
+      "content": "HP Elite Mini 800 G9ってノートPC？"
    }
  ],
-  "model": "qwen3-next",
+  "stream": true,
  "model": "qwen/qwen3-next-80b-a3b-instruct",
  "api_key": "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",
  "model_server": "https://openrouter.ai/api/v1",
-  "zip_url": "https://example.com/my-project.zip",
+  "zip_url": "http://127.0.0.1:8080/all_hp_product_spec_book2506.zip",
-  "stream": false
+  "extra_prompt": "## 其他说明\n1. 查询的设备类型为第一优先级，比如笔记本和台式机。\n2. 针对\"CPU处理器\"和\"GPU显卡\"的查询，因为命名方式多样性，查询优先级最低。\n3. 如果确实无法找到完全匹配的数据，根据用户要求，可接受性能更高(更低)的CPU处理器和GPU显卡是作为代替。"
  "generate_cfg": {
        # This parameter will affect the tool-call parsing logic. Default is False:
          # Set to True: when content is `<think>this is the thought</think>this is the answer`
          # Set to False: when response consists of reasoning_content and content
        # 'thought_in_content': True,
        # tool-call template: default is nous (recommended for qwen3):
        # 'fncall_prompt_type': 'nous',
        # Maximum input length, messages will be truncated if they exceed this length, please adjust according to model API:
        # 'max_input_tokens': 58000,
        # Parameters that will be passed directly to the model API, such as top_p, enable_thinking, etc., according to the API specifications:
        # 'top_p': 0.8,
        # Using the API's native tool call interface
        # 'use_raw_api': True,
    }
 }
 ```
--- a/pycache/gbase_agent.cpython-312.pyc
+++ b/pycache/gbase_agent.cpython-312.pyc
--- a/fastapi_app.py
+++ b/fastapi_app.py
@ -1,7 +1,7 @@
 import json
 import os
 from typing import AsyncGenerator, Dict, List, Optional, Union
 from contextlib import asynccontextmanager
 from typing import AsyncGenerator, Dict, List, Optional, Union
 import uvicorn
 from fastapi import BackgroundTasks, FastAPI, HTTPException
@ -88,9 +88,8 @@ class ChatRequest(BaseModel):
    api_key: Optional[str] = None
    model_server: Optional[str] = None
    zip_url: Optional[str] = None
-    extra: Optional[Dict] = None
+    generate_cfg: Optional[Dict] = None
    stream: Optional[bool] = False
    file_url: Optional[str] = None
    extra_prompt: Optional[str] = None
@ -202,7 +201,7 @@ async def chat_completions(request: ChatRequest):
        agent = await get_agent_from_pool(timeout=30.0)
        # 动态设置请求的模型，支持从接口传入api_key、model_server和extra参数
-        update_agent_llm(agent, request.model, request.api_key,  request.model_server)
+        update_agent_llm(agent, request.model, request.api_key,  request.model_server, request.generate_cfg)
        extra_prompt = request.extra_prompt if request.extra_prompt else ""
        # 构建包含项目信息的消息上下文
--- a/gbase_agent.py
+++ b/gbase_agent.py
@ -127,14 +127,15 @@ def init_agent_service_universal():
    return bot
-def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None):
+def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None,generate_cfg: Dict = None):
    """动态更新助手实例的LLM，支持从接口传入参数"""
    # 获取基础配置
    llm_config =  {
        "model": model_name,
        "api_key": api_key,
-        "model_server": model_server
+        "model_server": model_server,
        "generate_cfg": generate_cfg if generate_cfg else {}
    }
    # 创建LLM实例，确保不是字典