add /api/v3/llm/chat/completions
This commit is contained in:
parent
8466b0e710
commit
f18d966123
120
routes/chat.py
120
routes/chat.py
@ -18,8 +18,10 @@ from utils.fastapi_utils import (
|
|||||||
process_messages,
|
process_messages,
|
||||||
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
|
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
|
||||||
call_preamble_llm,
|
call_preamble_llm,
|
||||||
create_stream_chunk
|
create_stream_chunk,
|
||||||
|
detect_provider, sanitize_model_kwargs
|
||||||
)
|
)
|
||||||
|
from langchain.chat_models import init_chat_model
|
||||||
from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage
|
from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage
|
||||||
from utils.settings import MAX_OUTPUT_TOKENS
|
from utils.settings import MAX_OUTPUT_TOKENS
|
||||||
from agent.agent_config import AgentConfig
|
from agent.agent_config import AgentConfig
|
||||||
@ -968,6 +970,122 @@ async def chat_completions_v3(request: ChatRequestV3, authorization: Optional[st
|
|||||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str] = None):
|
||||||
|
"""Build a direct LLM client from a bot's database config.
|
||||||
|
|
||||||
|
Reuses the v3 config-loading chain to resolve model / api_key / model_server,
|
||||||
|
then constructs a LangChain chat model without any agent logic.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (llm_instance, model_name)
|
||||||
|
"""
|
||||||
|
bot_config = await fetch_bot_config_from_db(bot_id, user_identifier)
|
||||||
|
|
||||||
|
model_name = bot_config.get("model", "")
|
||||||
|
api_key = bot_config.get("api_key", "")
|
||||||
|
model_server = bot_config.get("model_server", "")
|
||||||
|
|
||||||
|
if not model_name:
|
||||||
|
raise HTTPException(status_code=400, detail=f"No model configured for bot '{bot_id}'")
|
||||||
|
|
||||||
|
# Detect provider and sanitize kwargs (same as the agent path)
|
||||||
|
model_provider, base_url = detect_provider(model_name, model_server)
|
||||||
|
model_kwargs, _, _ = sanitize_model_kwargs(
|
||||||
|
model_name=model_name,
|
||||||
|
model_provider=model_provider,
|
||||||
|
base_url=base_url,
|
||||||
|
api_key=api_key,
|
||||||
|
generate_cfg={},
|
||||||
|
source="llm_passthrough"
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = init_chat_model(**model_kwargs)
|
||||||
|
return llm, model_name
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/v3/llm/chat/completions")
|
||||||
|
async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
|
||||||
|
"""LLM passthrough API - direct LLM call, bypassing all agent logic.
|
||||||
|
|
||||||
|
Only model / api_key / model_server are read from the bot's database config
|
||||||
|
(resolved via bot_id). Messages are forwarded to the LLM as-is.
|
||||||
|
|
||||||
|
Required Parameters:
|
||||||
|
- bot_id: str - target bot id (used to look up LLM config from db)
|
||||||
|
- messages: List[Message] - conversation messages, passed through directly
|
||||||
|
|
||||||
|
Optional Parameters:
|
||||||
|
- stream: bool - whether to stream the output, default false
|
||||||
|
- user_identifier: str - used to resolve the api_key owner
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Union[dict, StreamingResponse]: OpenAI-compatible completion or stream
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
bot_id = request.bot_id
|
||||||
|
if not bot_id:
|
||||||
|
raise HTTPException(status_code=400, detail="bot_id is required")
|
||||||
|
|
||||||
|
# Optional auth check (consistent with v3, non-blocking)
|
||||||
|
if authorization:
|
||||||
|
expected_token = generate_v2_auth_token(bot_id)
|
||||||
|
provided_token = extract_api_key_from_auth(authorization)
|
||||||
|
if provided_token and provided_token != expected_token:
|
||||||
|
logger.warning("Invalid auth token provided for LLM passthrough API, but continuing anyway")
|
||||||
|
|
||||||
|
# Build the LLM client from db config
|
||||||
|
llm, model_name = await build_llm_from_bot_config(bot_id, request.user_identifier)
|
||||||
|
|
||||||
|
# Forward messages as-is (pure passthrough, no agent processing)
|
||||||
|
lc_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
|
||||||
|
|
||||||
|
chunk_id = f"chatcmpl-{int(time.time())}"
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
if request.stream:
|
||||||
|
async def generate():
|
||||||
|
try:
|
||||||
|
async for chunk in llm.astream(lc_messages):
|
||||||
|
content = chunk.content if isinstance(chunk.content, str) else str(chunk.content)
|
||||||
|
if content:
|
||||||
|
data = create_stream_chunk(chunk_id, model_name, content=content)
|
||||||
|
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||||
|
# Final chunk with finish_reason
|
||||||
|
done = create_stream_chunk(chunk_id, model_name, finish_reason="stop")
|
||||||
|
yield f"data: {json.dumps(done, ensure_ascii=False)}\n\n"
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
except Exception as stream_error:
|
||||||
|
logger.error(f"Error in LLM passthrough stream: {stream_error}")
|
||||||
|
err = {"error": {"message": str(stream_error), "type": "internal_error"}}
|
||||||
|
yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(generate(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = await llm.ainvoke(lc_messages)
|
||||||
|
content = response.content if isinstance(response.content, str) else str(response.content)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": chunk_id,
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": int(time.time()),
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"message": {"role": "assistant", "content": content},
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
error_details = traceback.format_exc()
|
||||||
|
logger.error(f"Error in llm_passthrough_v3: {str(e)}")
|
||||||
|
logger.error(f"Full traceback: {error_details}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Chat history query endpoints
|
# Chat history query endpoints
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user