diff --git a/ZIP_PROJECT_README.md b/ZIP_PROJECT_README.md index 29c2246..047f563 100644 --- a/ZIP_PROJECT_README.md +++ b/ZIP_PROJECT_README.md @@ -22,13 +22,33 @@ "messages": [ { "role": "user", - "content": "请分析项目中的数据文件" + "content": "HP Elite Mini 800 G9ってノートPC?" } ], - "model": "qwen3-next", + "stream": true, + "model": "qwen/qwen3-next-80b-a3b-instruct", + "api_key": "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", "model_server": "https://openrouter.ai/api/v1", - "zip_url": "https://example.com/my-project.zip", - "stream": false + "zip_url": "http://127.0.0.1:8080/all_hp_product_spec_book2506.zip", + "extra_prompt": "## 其他说明\n1. 查询的设备类型为第一优先级,比如笔记本和台式机。\n2. 针对\"CPU处理器\"和\"GPU显卡\"的查询,因为命名方式多样性,查询优先级最低。\n3. 如果确实无法找到完全匹配的数据,根据用户要求,可接受性能更高(更低)的CPU处理器和GPU显卡是作为代替。" + "generate_cfg": { + # This parameter will affect the tool-call parsing logic. Default is False: + # Set to True: when content is `this is the thoughtthis is the answer` + # Set to False: when response consists of reasoning_content and content + # 'thought_in_content': True, + + # tool-call template: default is nous (recommended for qwen3): + # 'fncall_prompt_type': 'nous', + + # Maximum input length, messages will be truncated if they exceed this length, please adjust according to model API: + # 'max_input_tokens': 58000, + + # Parameters that will be passed directly to the model API, such as top_p, enable_thinking, etc., according to the API specifications: + # 'top_p': 0.8, + + # Using the API's native tool call interface + # 'use_raw_api': True, + } } ``` @@ -154,4 +174,4 @@ response = requests.post("http://localhost:8000/chat/completions", json={ - **缓存**: 基于URL哈希的文件缓存 - **并发安全**: 支持多并发请求处理 -这个功能实现了极简的无状态项目管理,用户只需在最外层提供model_server和zip_url参数,系统会自动处理模型配置、项目标识生成、下载、解压和缓存,最大程度简化了项目管理的复杂度。 \ No newline at end of file +这个功能实现了极简的无状态项目管理,用户只需在最外层提供model_server和zip_url参数,系统会自动处理模型配置、项目标识生成、下载、解压和缓存,最大程度简化了项目管理的复杂度。 diff --git a/__pycache__/gbase_agent.cpython-312.pyc b/__pycache__/gbase_agent.cpython-312.pyc index e2649d8..86c5259 100644 Binary files a/__pycache__/gbase_agent.cpython-312.pyc and b/__pycache__/gbase_agent.cpython-312.pyc differ diff --git a/fastapi_app.py b/fastapi_app.py index 2d87f2e..8f5634a 100644 --- a/fastapi_app.py +++ b/fastapi_app.py @@ -1,7 +1,7 @@ import json import os -from typing import AsyncGenerator, Dict, List, Optional, Union from contextlib import asynccontextmanager +from typing import AsyncGenerator, Dict, List, Optional, Union import uvicorn from fastapi import BackgroundTasks, FastAPI, HTTPException @@ -88,9 +88,8 @@ class ChatRequest(BaseModel): api_key: Optional[str] = None model_server: Optional[str] = None zip_url: Optional[str] = None - extra: Optional[Dict] = None + generate_cfg: Optional[Dict] = None stream: Optional[bool] = False - file_url: Optional[str] = None extra_prompt: Optional[str] = None @@ -202,7 +201,7 @@ async def chat_completions(request: ChatRequest): agent = await get_agent_from_pool(timeout=30.0) # 动态设置请求的模型,支持从接口传入api_key、model_server和extra参数 - update_agent_llm(agent, request.model, request.api_key, request.model_server) + update_agent_llm(agent, request.model, request.api_key, request.model_server, request.generate_cfg) extra_prompt = request.extra_prompt if request.extra_prompt else "" # 构建包含项目信息的消息上下文 diff --git a/gbase_agent.py b/gbase_agent.py index 678ed35..93a1c1e 100644 --- a/gbase_agent.py +++ b/gbase_agent.py @@ -127,14 +127,15 @@ def init_agent_service_universal(): return bot -def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None): +def update_agent_llm(agent, model_name: str, api_key: str = None, model_server: str = None,generate_cfg: Dict = None): """动态更新助手实例的LLM,支持从接口传入参数""" # 获取基础配置 llm_config = { "model": model_name, "api_key": api_key, - "model_server": model_server + "model_server": model_server, + "generate_cfg": generate_cfg if generate_cfg else {} } # 创建LLM实例,确保不是字典