add /api/v3/llm/chat/completions

This commit is contained in:
朱潮 2026-06-07 10:55:25 +08:00
parent 8466b0e710
commit f18d966123

View File

@ -18,8 +18,10 @@ from utils.fastapi_utils import (
process_messages,
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
call_preamble_llm,
create_stream_chunk
create_stream_chunk,
detect_provider, sanitize_model_kwargs
)
from langchain.chat_models import init_chat_model
from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage
from utils.settings import MAX_OUTPUT_TOKENS
from agent.agent_config import AgentConfig
@ -968,6 +970,122 @@ async def chat_completions_v3(request: ChatRequestV3, authorization: Optional[st
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str] = None):
"""Build a direct LLM client from a bot's database config.
Reuses the v3 config-loading chain to resolve model / api_key / model_server,
then constructs a LangChain chat model without any agent logic.
Returns:
tuple: (llm_instance, model_name)
"""
bot_config = await fetch_bot_config_from_db(bot_id, user_identifier)
model_name = bot_config.get("model", "")
api_key = bot_config.get("api_key", "")
model_server = bot_config.get("model_server", "")
if not model_name:
raise HTTPException(status_code=400, detail=f"No model configured for bot '{bot_id}'")
# Detect provider and sanitize kwargs (same as the agent path)
model_provider, base_url = detect_provider(model_name, model_server)
model_kwargs, _, _ = sanitize_model_kwargs(
model_name=model_name,
model_provider=model_provider,
base_url=base_url,
api_key=api_key,
generate_cfg={},
source="llm_passthrough"
)
llm = init_chat_model(**model_kwargs)
return llm, model_name
@router.post("/api/v3/llm/chat/completions")
async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
"""LLM passthrough API - direct LLM call, bypassing all agent logic.
Only model / api_key / model_server are read from the bot's database config
(resolved via bot_id). Messages are forwarded to the LLM as-is.
Required Parameters:
- bot_id: str - target bot id (used to look up LLM config from db)
- messages: List[Message] - conversation messages, passed through directly
Optional Parameters:
- stream: bool - whether to stream the output, default false
- user_identifier: str - used to resolve the api_key owner
Returns:
Union[dict, StreamingResponse]: OpenAI-compatible completion or stream
"""
try:
bot_id = request.bot_id
if not bot_id:
raise HTTPException(status_code=400, detail="bot_id is required")
# Optional auth check (consistent with v3, non-blocking)
if authorization:
expected_token = generate_v2_auth_token(bot_id)
provided_token = extract_api_key_from_auth(authorization)
if provided_token and provided_token != expected_token:
logger.warning("Invalid auth token provided for LLM passthrough API, but continuing anyway")
# Build the LLM client from db config
llm, model_name = await build_llm_from_bot_config(bot_id, request.user_identifier)
# Forward messages as-is (pure passthrough, no agent processing)
lc_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
chunk_id = f"chatcmpl-{int(time.time())}"
# Streaming response
if request.stream:
async def generate():
try:
async for chunk in llm.astream(lc_messages):
content = chunk.content if isinstance(chunk.content, str) else str(chunk.content)
if content:
data = create_stream_chunk(chunk_id, model_name, content=content)
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
# Final chunk with finish_reason
done = create_stream_chunk(chunk_id, model_name, finish_reason="stop")
yield f"data: {json.dumps(done, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
except Exception as stream_error:
logger.error(f"Error in LLM passthrough stream: {stream_error}")
err = {"error": {"message": str(stream_error), "type": "internal_error"}}
yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
# Non-streaming response
response = await llm.ainvoke(lc_messages)
content = response.content if isinstance(response.content, str) else str(response.content)
return {
"id": chunk_id,
"object": "chat.completion",
"created": int(time.time()),
"model": model_name,
"choices": [{
"index": 0,
"message": {"role": "assistant", "content": content},
"finish_reason": "stop"
}]
}
except HTTPException:
raise
except Exception as e:
error_details = traceback.format_exc()
logger.error(f"Error in llm_passthrough_v3: {str(e)}")
logger.error(f"Full traceback: {error_details}")
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
# ============================================================================
# Chat history query endpoints
# ============================================================================