cfg

2025-10-07 22:07:50 +08:00 · 2025-10-07 22:07:50 +08:00 · 531e6de69e
commit 531e6de69e
parent 9245864314
4 changed files with 31 additions and 11 deletions
--- a/ZIP_PROJECT_README.md
+++ b/ZIP_PROJECT_README.md
@ -22,13 +22,33 @@
  "messages": [
    {
      "role": "user",
-      "content": "请分析项目中的数据文件"
+      "content": "HP Elite Mini 800 G9ってノートPC？"
    }
  ],
-  "model": "qwen3-next",
+  "stream": true,
+  "model": "qwen/qwen3-next-80b-a3b-instruct",
+  "api_key": "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",
  "model_server": "https://openrouter.ai/api/v1",
-  "zip_url": "https://example.com/my-project.zip",
-  "stream": false
+  "zip_url": "http://127.0.0.1:8080/all_hp_product_spec_book2506.zip",
+  "extra_prompt": "## 其他说明\n1. 查询的设备类型为第一优先级，比如笔记本和台式机。\n2. 针对\"CPU处理器\"和\"GPU显卡\"的查询，因为命名方式多样性，查询优先级最低。\n3. 如果确实无法找到完全匹配的数据，根据用户要求，可接受性能更高(更低)的CPU处理器和GPU显卡是作为代替。"
+  "generate_cfg": {
+        # This parameter will affect the tool-call parsing logic. Default is False:
+          # Set to True: when content is `<think>this is the thought</think>this is the answer`
+          # Set to False: when response consists of reasoning_content and content
+        # 'thought_in_content': True,
+
+        # tool-call template: default is nous (recommended for qwen3):
+        # 'fncall_prompt_type': 'nous',
+
+        # Maximum input length, messages will be truncated if they exceed this length, please adjust according to model API:
+        # 'max_input_tokens': 58000,
+
+        # Parameters that will be passed directly to the model API, such as top_p, enable_thinking, etc., according to the API specifications:
+        # 'top_p': 0.8,
+
+        # Using the API's native tool call interface
+        # 'use_raw_api': True,
+    }
 }
 ```

@ -154,4 +174,4 @@ response = requests.post("http://localhost:8000/chat/completions", json={
 - **缓存**: 基于URL哈希的文件缓存
 - **并发安全**: 支持多并发请求处理

-这个功能实现了极简的无状态项目管理，用户只需在最外层提供model_server和zip_url参数，系统会自动处理模型配置、项目标识生成、下载、解压和缓存，最大程度简化了项目管理的复杂度。
+这个功能实现了极简的无状态项目管理，用户只需在最外层提供model_server和zip_url参数，系统会自动处理模型配置、项目标识生成、下载、解压和缓存，最大程度简化了项目管理的复杂度。
--- a/pycache/gbase_agent.cpython-312.pyc
+++ b/pycache/gbase_agent.cpython-312.pyc
--- a/fastapi_app.py
+++ b/fastapi_app.py
@ -1,7 +1,7 @@
 import json
 import os
-from typing import AsyncGenerator, Dict, List, Optional, Union
 from contextlib import asynccontextmanager
+from typing import AsyncGenerator, Dict, List, Optional, Union

 import uvicorn
 from fastapi import BackgroundTasks, FastAPI, HTTPException
@ -88,9 +88,8 @@ class ChatRequest(BaseModel):
    api_key: Optional[str] = None
    model_server: Optional[str] = None
    zip_url: Optional[str] = None
-    extra: Optional[Dict] = None
+    generate_cfg: Optional[Dict] = None
    stream: Optional[bool] = False
-    file_url: Optional[str] = None
    extra_prompt: Optional[str] = None


@ -202,7 +201,7 @@ async def chat_completions(request: ChatRequest):
        agent = await get_agent_from_pool(timeout=30.0)
        
        # 动态设置请求的模型，支持从接口传入api_key、model_server和extra参数
-        update_agent_llm(agent, request.model, request.api_key,  request.model_server)
+        update_agent_llm(agent, request.model, request.api_key,  request.model_server, request.generate_cfg)
        
        extra_prompt = request.extra_prompt if request.extra_prompt else ""
        # 构建包含项目信息的消息上下文
--- a/gbase_agent.py
+++ b/gbase_agent.py
@ -127,14 +127,15 @@ def init_agent_service_universal():
    return bot


-def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None):
+def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None,generate_cfg: Dict = None):
    """动态更新助手实例的LLM，支持从接口传入参数"""

    # 获取基础配置
    llm_config =  {
        "model": model_name,
        "api_key": api_key,
-        "model_server": model_server
+        "model_server": model_server,
+        "generate_cfg": generate_cfg if generate_cfg else {}
    }
    
    # 创建LLM实例，确保不是字典