add /api/v3/llm/chat/completions

2026-06-07 10:55:25 +08:00 · 2026-06-07 10:55:25 +08:00 · f18d966123
commit f18d966123
parent 8466b0e710
1 changed files with 119 additions and 1 deletions
--- a/routes/chat.py
+++ b/routes/chat.py
@ -18,8 +18,10 @@ from utils.fastapi_utils import (
    process_messages,
    create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
    call_preamble_llm,
-    create_stream_chunk
+    create_stream_chunk,
    detect_provider, sanitize_model_kwargs
 )
 from langchain.chat_models import init_chat_model
 from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage
 from utils.settings import MAX_OUTPUT_TOKENS
 from agent.agent_config import AgentConfig
@ -968,6 +970,122 @@ async def chat_completions_v3(request: ChatRequestV3, authorization: Optional[st
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str] = None):
    """Build a direct LLM client from a bot's database config.
    Reuses the v3 config-loading chain to resolve model / api_key / model_server,
    then constructs a LangChain chat model without any agent logic.
    Returns:
        tuple: (llm_instance, model_name)
    """
    bot_config = await fetch_bot_config_from_db(bot_id, user_identifier)
    model_name = bot_config.get("model", "")
    api_key = bot_config.get("api_key", "")
    model_server = bot_config.get("model_server", "")
    if not model_name:
        raise HTTPException(status_code=400, detail=f"No model configured for bot '{bot_id}'")
    # Detect provider and sanitize kwargs (same as the agent path)
    model_provider, base_url = detect_provider(model_name, model_server)
    model_kwargs, _, _ = sanitize_model_kwargs(
        model_name=model_name,
        model_provider=model_provider,
        base_url=base_url,
        api_key=api_key,
        generate_cfg={},
        source="llm_passthrough"
    )
    llm = init_chat_model(**model_kwargs)
    return llm, model_name
@router.post("/api/v3/llm/chat/completions")
 async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
    """LLM passthrough API - direct LLM call, bypassing all agent logic.
    Only model / api_key / model_server are read from the bot's database config
    (resolved via bot_id). Messages are forwarded to the LLM as-is.
    Required Parameters:
        - bot_id: str - target bot id (used to look up LLM config from db)
        - messages: List[Message] - conversation messages, passed through directly
    Optional Parameters:
        - stream: bool - whether to stream the output, default false
        - user_identifier: str - used to resolve the api_key owner
    Returns:
        Union[dict, StreamingResponse]: OpenAI-compatible completion or stream
    """
    try:
        bot_id = request.bot_id
        if not bot_id:
            raise HTTPException(status_code=400, detail="bot_id is required")
        # Optional auth check (consistent with v3, non-blocking)
        if authorization:
            expected_token = generate_v2_auth_token(bot_id)
            provided_token = extract_api_key_from_auth(authorization)
            if provided_token and provided_token != expected_token:
                logger.warning("Invalid auth token provided for LLM passthrough API, but continuing anyway")
        # Build the LLM client from db config
        llm, model_name = await build_llm_from_bot_config(bot_id, request.user_identifier)
        # Forward messages as-is (pure passthrough, no agent processing)
        lc_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
        chunk_id = f"chatcmpl-{int(time.time())}"
        # Streaming response
        if request.stream:
            async def generate():
                try:
                    async for chunk in llm.astream(lc_messages):
                        content = chunk.content if isinstance(chunk.content, str) else str(chunk.content)
                        if content:
                            data = create_stream_chunk(chunk_id, model_name, content=content)
                            yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
                    # Final chunk with finish_reason
                    done = create_stream_chunk(chunk_id, model_name, finish_reason="stop")
                    yield f"data: {json.dumps(done, ensure_ascii=False)}\n\n"
                    yield "data: [DONE]\n\n"
                except Exception as stream_error:
                    logger.error(f"Error in LLM passthrough stream: {stream_error}")
                    err = {"error": {"message": str(stream_error), "type": "internal_error"}}
                    yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
            return StreamingResponse(generate(), media_type="text/event-stream")
        # Non-streaming response
        response = await llm.ainvoke(lc_messages)
        content = response.content if isinstance(response.content, str) else str(response.content)
        return {
            "id": chunk_id,
            "object": "chat.completion",
            "created": int(time.time()),
            "model": model_name,
            "choices": [{
                "index": 0,
                "message": {"role": "assistant", "content": content},
                "finish_reason": "stop"
            }]
        }
    except HTTPException:
        raise
    except Exception as e:
        error_details = traceback.format_exc()
        logger.error(f"Error in llm_passthrough_v3: {str(e)}")
        logger.error(f"Full traceback: {error_details}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 # ============================================================================
 # Chat history query endpoints
 # ============================================================================