From f18d96612399b77db0298a512b96c8aa8456edbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Sun, 7 Jun 2026 10:55:25 +0800 Subject: [PATCH] add /api/v3/llm/chat/completions --- routes/chat.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/routes/chat.py b/routes/chat.py index 9f98e47..b6b29e4 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -18,8 +18,10 @@ from utils.fastapi_utils import ( process_messages, create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db, call_preamble_llm, - create_stream_chunk + create_stream_chunk, + detect_provider, sanitize_model_kwargs ) +from langchain.chat_models import init_chat_model from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage from utils.settings import MAX_OUTPUT_TOKENS from agent.agent_config import AgentConfig @@ -968,6 +970,122 @@ async def chat_completions_v3(request: ChatRequestV3, authorization: Optional[st raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") +async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str] = None): + """Build a direct LLM client from a bot's database config. + + Reuses the v3 config-loading chain to resolve model / api_key / model_server, + then constructs a LangChain chat model without any agent logic. + + Returns: + tuple: (llm_instance, model_name) + """ + bot_config = await fetch_bot_config_from_db(bot_id, user_identifier) + + model_name = bot_config.get("model", "") + api_key = bot_config.get("api_key", "") + model_server = bot_config.get("model_server", "") + + if not model_name: + raise HTTPException(status_code=400, detail=f"No model configured for bot '{bot_id}'") + + # Detect provider and sanitize kwargs (same as the agent path) + model_provider, base_url = detect_provider(model_name, model_server) + model_kwargs, _, _ = sanitize_model_kwargs( + model_name=model_name, + model_provider=model_provider, + base_url=base_url, + api_key=api_key, + generate_cfg={}, + source="llm_passthrough" + ) + + llm = init_chat_model(**model_kwargs) + return llm, model_name + + +@router.post("/api/v3/llm/chat/completions") +async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)): + """LLM passthrough API - direct LLM call, bypassing all agent logic. + + Only model / api_key / model_server are read from the bot's database config + (resolved via bot_id). Messages are forwarded to the LLM as-is. + + Required Parameters: + - bot_id: str - target bot id (used to look up LLM config from db) + - messages: List[Message] - conversation messages, passed through directly + + Optional Parameters: + - stream: bool - whether to stream the output, default false + - user_identifier: str - used to resolve the api_key owner + + Returns: + Union[dict, StreamingResponse]: OpenAI-compatible completion or stream + """ + try: + bot_id = request.bot_id + if not bot_id: + raise HTTPException(status_code=400, detail="bot_id is required") + + # Optional auth check (consistent with v3, non-blocking) + if authorization: + expected_token = generate_v2_auth_token(bot_id) + provided_token = extract_api_key_from_auth(authorization) + if provided_token and provided_token != expected_token: + logger.warning("Invalid auth token provided for LLM passthrough API, but continuing anyway") + + # Build the LLM client from db config + llm, model_name = await build_llm_from_bot_config(bot_id, request.user_identifier) + + # Forward messages as-is (pure passthrough, no agent processing) + lc_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages] + + chunk_id = f"chatcmpl-{int(time.time())}" + + # Streaming response + if request.stream: + async def generate(): + try: + async for chunk in llm.astream(lc_messages): + content = chunk.content if isinstance(chunk.content, str) else str(chunk.content) + if content: + data = create_stream_chunk(chunk_id, model_name, content=content) + yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + # Final chunk with finish_reason + done = create_stream_chunk(chunk_id, model_name, finish_reason="stop") + yield f"data: {json.dumps(done, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + except Exception as stream_error: + logger.error(f"Error in LLM passthrough stream: {stream_error}") + err = {"error": {"message": str(stream_error), "type": "internal_error"}} + yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream") + + # Non-streaming response + response = await llm.ainvoke(lc_messages) + content = response.content if isinstance(response.content, str) else str(response.content) + + return { + "id": chunk_id, + "object": "chat.completion", + "created": int(time.time()), + "model": model_name, + "choices": [{ + "index": 0, + "message": {"role": "assistant", "content": content}, + "finish_reason": "stop" + }] + } + + except HTTPException: + raise + except Exception as e: + error_details = traceback.format_exc() + logger.error(f"Error in llm_passthrough_v3: {str(e)}") + logger.error(f"Full traceback: {error_details}") + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + # ============================================================================ # Chat history query endpoints # ============================================================================