From f18d96612399b77db0298a512b96c8aa8456edbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= <zhuchaowe@users.noreply.github.com>
Date: Sun, 7 Jun 2026 10:55:25 +0800
Subject: [PATCH] add /api/v3/llm/chat/completions

---
 routes/chat.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 119 insertions(+), 1 deletion(-)

diff --git a/routes/chat.py b/routes/chat.py
index 9f98e47..b6b29e4 100644
--- a/routes/chat.py
+++ b/routes/chat.py
@@ -18,8 +18,10 @@ from utils.fastapi_utils import (
     process_messages,
     create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
     call_preamble_llm,
-    create_stream_chunk
+    create_stream_chunk,
+    detect_provider, sanitize_model_kwargs
 )
+from langchain.chat_models import init_chat_model
 from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage
 from utils.settings import MAX_OUTPUT_TOKENS
 from agent.agent_config import AgentConfig
@@ -968,6 +970,122 @@ async def chat_completions_v3(request: ChatRequestV3, authorization: Optional[st
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 
 
+async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str] = None):
+    """Build a direct LLM client from a bot's database config.
+
+    Reuses the v3 config-loading chain to resolve model / api_key / model_server,
+    then constructs a LangChain chat model without any agent logic.
+
+    Returns:
+        tuple: (llm_instance, model_name)
+    """
+    bot_config = await fetch_bot_config_from_db(bot_id, user_identifier)
+
+    model_name = bot_config.get("model", "")
+    api_key = bot_config.get("api_key", "")
+    model_server = bot_config.get("model_server", "")
+
+    if not model_name:
+        raise HTTPException(status_code=400, detail=f"No model configured for bot '{bot_id}'")
+
+    # Detect provider and sanitize kwargs (same as the agent path)
+    model_provider, base_url = detect_provider(model_name, model_server)
+    model_kwargs, _, _ = sanitize_model_kwargs(
+        model_name=model_name,
+        model_provider=model_provider,
+        base_url=base_url,
+        api_key=api_key,
+        generate_cfg={},
+        source="llm_passthrough"
+    )
+
+    llm = init_chat_model(**model_kwargs)
+    return llm, model_name
+
+
+@router.post("/api/v3/llm/chat/completions")
+async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
+    """LLM passthrough API - direct LLM call, bypassing all agent logic.
+
+    Only model / api_key / model_server are read from the bot's database config
+    (resolved via bot_id). Messages are forwarded to the LLM as-is.
+
+    Required Parameters:
+        - bot_id: str - target bot id (used to look up LLM config from db)
+        - messages: List[Message] - conversation messages, passed through directly
+
+    Optional Parameters:
+        - stream: bool - whether to stream the output, default false
+        - user_identifier: str - used to resolve the api_key owner
+
+    Returns:
+        Union[dict, StreamingResponse]: OpenAI-compatible completion or stream
+    """
+    try:
+        bot_id = request.bot_id
+        if not bot_id:
+            raise HTTPException(status_code=400, detail="bot_id is required")
+
+        # Optional auth check (consistent with v3, non-blocking)
+        if authorization:
+            expected_token = generate_v2_auth_token(bot_id)
+            provided_token = extract_api_key_from_auth(authorization)
+            if provided_token and provided_token != expected_token:
+                logger.warning("Invalid auth token provided for LLM passthrough API, but continuing anyway")
+
+        # Build the LLM client from db config
+        llm, model_name = await build_llm_from_bot_config(bot_id, request.user_identifier)
+
+        # Forward messages as-is (pure passthrough, no agent processing)
+        lc_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+
+        chunk_id = f"chatcmpl-{int(time.time())}"
+
+        # Streaming response
+        if request.stream:
+            async def generate():
+                try:
+                    async for chunk in llm.astream(lc_messages):
+                        content = chunk.content if isinstance(chunk.content, str) else str(chunk.content)
+                        if content:
+                            data = create_stream_chunk(chunk_id, model_name, content=content)
+                            yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+                    # Final chunk with finish_reason
+                    done = create_stream_chunk(chunk_id, model_name, finish_reason="stop")
+                    yield f"data: {json.dumps(done, ensure_ascii=False)}\n\n"
+                    yield "data: [DONE]\n\n"
+                except Exception as stream_error:
+                    logger.error(f"Error in LLM passthrough stream: {stream_error}")
+                    err = {"error": {"message": str(stream_error), "type": "internal_error"}}
+                    yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
+
+            return StreamingResponse(generate(), media_type="text/event-stream")
+
+        # Non-streaming response
+        response = await llm.ainvoke(lc_messages)
+        content = response.content if isinstance(response.content, str) else str(response.content)
+
+        return {
+            "id": chunk_id,
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": model_name,
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": content},
+                "finish_reason": "stop"
+            }]
+        }
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        error_details = traceback.format_exc()
+        logger.error(f"Error in llm_passthrough_v3: {str(e)}")
+        logger.error(f"Full traceback: {error_details}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+
+
 # ============================================================================
 # Chat history query endpoints
 # ============================================================================