1245 lines
54 KiB
Python
1245 lines
54 KiB
Python
import json
|
||
import os
|
||
import asyncio
|
||
import shutil
|
||
import time
|
||
import traceback
|
||
from typing import Union, Optional, Any, List, Dict
|
||
from fastapi import APIRouter, HTTPException, Header, Body
|
||
from fastapi.responses import StreamingResponse
|
||
import logging
|
||
|
||
logger = logging.getLogger('app')
|
||
from utils import (
|
||
Message, ChatRequest, ChatResponse, BatchSaveChatRequest, BatchSaveChatResponse
|
||
)
|
||
from utils.api_models import ChatRequestV2, ChatRequestV3, LLMPassthroughRequest
|
||
from utils.fastapi_utils import (
|
||
process_messages,
|
||
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
|
||
call_preamble_llm,
|
||
create_stream_chunk,
|
||
detect_provider, sanitize_model_kwargs,
|
||
extract_text_from_content
|
||
)
|
||
from langchain.chat_models import init_chat_model
|
||
from langchain_core.messages import AIMessageChunk, ToolMessage, AIMessage, HumanMessage
|
||
from utils.settings import MAX_OUTPUT_TOKENS
|
||
from agent.agent_config import AgentConfig
|
||
from agent.deep_assistant import init_agent
|
||
from utils.daytona_sync import sync_sandbox_to_local
|
||
from utils.settings import DAYTONA_ENABLED
|
||
from utils.structured_log import emit_question_metric
|
||
|
||
router = APIRouter()
|
||
|
||
|
||
async def enhanced_generate_stream_response(
|
||
config: AgentConfig
|
||
):
|
||
"""Enhanced progressive streaming response generator - concurrency-optimized version
|
||
|
||
Args:
|
||
agent: LangChain agent object
|
||
config: AgentConfig object containing all parameters
|
||
"""
|
||
# Collect the full response content for saving to the database
|
||
full_response_content = []
|
||
|
||
# Cancellation management
|
||
cancel_event = None
|
||
request_started_at = config.request_started_at or time.monotonic()
|
||
|
||
try:
|
||
# Create output queue and control events
|
||
output_queue = asyncio.Queue()
|
||
preamble_completed = asyncio.Event()
|
||
|
||
# Register cancellation event
|
||
if config.session_id:
|
||
from utils.cancel_manager import register_cancel_event, unregister_cancel_event
|
||
cancel_event = register_cancel_event(config.session_id)
|
||
|
||
# Save user message before streaming starts
|
||
if config.session_id:
|
||
asyncio.create_task(_save_user_messages(config))
|
||
|
||
# Preamble task
|
||
async def preamble_task():
|
||
try:
|
||
preamble_result = await call_preamble_llm(config)
|
||
# Only output when preamble_text is non-empty and not "<empty>"
|
||
if preamble_result and preamble_result.strip() and preamble_result != "<empty>":
|
||
preamble_content = f"[PREAMBLE]\n{preamble_result}\n"
|
||
chunk_data = create_stream_chunk(f"chatcmpl-preamble", config.model_name, preamble_content)
|
||
await output_queue.put(("preamble", f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n"))
|
||
logger.info(f"Stream mode: Generated preamble text ({len(preamble_result)} chars)")
|
||
else:
|
||
logger.info("Stream mode: Skipped empty preamble text")
|
||
|
||
# Mark preamble as completed
|
||
preamble_completed.set()
|
||
await output_queue.put(("preamble_done", None))
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error generating preamble text: {e}")
|
||
# Mark completion even on error to avoid blocking
|
||
preamble_completed.set()
|
||
await output_queue.put(("preamble_done", None))
|
||
|
||
# Agent task (setup + streaming)
|
||
async def agent_task():
|
||
checkpointer = None
|
||
try:
|
||
# Start streaming
|
||
logger.info(f"Starting agent stream response")
|
||
chunk_id = 0
|
||
message_tag = ""
|
||
current_tool_name = ""
|
||
last_answer_first_char_duration_ms = None
|
||
waiting_for_answer_first_char = False
|
||
agent, checkpointer, sandbox = await init_agent(config)
|
||
async for msg, metadata in agent.astream({"messages": config.messages}, stream_mode="messages", config=config.invoke_config(), max_tokens=MAX_OUTPUT_TOKENS):
|
||
# Check whether a cancellation signal was received
|
||
if cancel_event and cancel_event.is_set():
|
||
logger.info(f"Agent stream cancelled for session_id={config.session_id}")
|
||
break
|
||
|
||
new_content = ""
|
||
|
||
if isinstance(msg, AIMessageChunk):
|
||
# Handle tool calls
|
||
if msg.tool_call_chunks:
|
||
message_tag = "TOOL_CALL"
|
||
waiting_for_answer_first_char = False
|
||
for tool_call_chunk in msg.tool_call_chunks:
|
||
chunk_name = tool_call_chunk.get("name") if isinstance(tool_call_chunk, dict) else getattr(tool_call_chunk, "name", None)
|
||
chunk_args = tool_call_chunk.get("args") if isinstance(tool_call_chunk, dict) else getattr(tool_call_chunk, "args", None)
|
||
if chunk_name:
|
||
current_tool_name = chunk_name
|
||
if config.tool_response:
|
||
if chunk_name:
|
||
new_content = f"[{message_tag}] {chunk_name}\n"
|
||
if chunk_args:
|
||
new_content += chunk_args
|
||
# Handle text content
|
||
elif msg.content:
|
||
preamble_completed.set()
|
||
await output_queue.put(("preamble_done", None))
|
||
meta_message_tag = metadata.get("message_tag", "ANSWER")
|
||
# Do not output content for SUMMARY
|
||
if meta_message_tag == "SUMMARY":
|
||
continue
|
||
if meta_message_tag != message_tag:
|
||
message_tag = meta_message_tag
|
||
waiting_for_answer_first_char = meta_message_tag == "ANSWER"
|
||
new_content = f"[{meta_message_tag}]\n"
|
||
if msg.text:
|
||
if meta_message_tag == "ANSWER" and waiting_for_answer_first_char and msg.text.strip():
|
||
last_answer_first_char_duration_ms = max(
|
||
int((time.monotonic() - request_started_at) * 1000),
|
||
0,
|
||
)
|
||
waiting_for_answer_first_char = False
|
||
new_content += msg.text
|
||
# Handle tool responses
|
||
elif isinstance(msg, ToolMessage) and msg.content:
|
||
message_tag = "TOOL_RESPONSE"
|
||
waiting_for_answer_first_char = False
|
||
# Always output MCP App responses even when tool_response is disabled
|
||
is_ui_resource = (
|
||
msg.text
|
||
and msg.text.lstrip().startswith('{"')
|
||
and '"type":"app"' in msg.text
|
||
)
|
||
if config.tool_response or is_ui_resource:
|
||
new_content = f"[{message_tag}] {msg.name}\n{msg.text}\n"
|
||
|
||
# Collect full content
|
||
if new_content:
|
||
full_response_content.append(new_content)
|
||
|
||
# Send content chunk
|
||
if chunk_id == 0:
|
||
logger.info("Agent first token generated, starting stream output")
|
||
chunk_id += 1
|
||
chunk_data = create_stream_chunk(f"chatcmpl-{chunk_id}", config.model_name, new_content)
|
||
await output_queue.put(("agent", f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n"))
|
||
|
||
# Send final chunk
|
||
finish = "cancelled" if (cancel_event and cancel_event.is_set()) else "stop"
|
||
if last_answer_first_char_duration_ms is not None:
|
||
emit_question_metric(
|
||
stage="catalog_agent.final_answer_first_char",
|
||
status="cancel" if finish == "cancelled" else "success",
|
||
duration_ms=last_answer_first_char_duration_ms,
|
||
first_response_time_ms=last_answer_first_char_duration_ms,
|
||
trace_id=config.trace_id,
|
||
ai_id=config.bot_id,
|
||
session_id=config.session_id,
|
||
robot_type="agent",
|
||
model=config.model_name,
|
||
stream=config.stream,
|
||
extra={
|
||
"bot_id": config.bot_id,
|
||
"tool_response": config.tool_response,
|
||
"enable_thinking": config.enable_thinking,
|
||
"response_mode": "final_answer_first_char",
|
||
},
|
||
)
|
||
final_chunk = create_stream_chunk(f"chatcmpl-{chunk_id + 1}", config.model_name, finish_reason=finish)
|
||
await output_queue.put(("agent", f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n"))
|
||
# ============ Execute PostAgent hooks ============
|
||
# Note: runs in a separate async task here and does not block streaming
|
||
full_response = "".join(full_response_content)
|
||
asyncio.create_task(_execute_post_agent_hooks(config, full_response, sandbox))
|
||
# ===========================================
|
||
|
||
await output_queue.put(("agent_done", None))
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error in agent task: {e}\n{traceback.format_exc()}")
|
||
# Send error information to the client
|
||
await output_queue.put(
|
||
("agent", f"data: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n")
|
||
)
|
||
# Send completion signal to ensure the output controller exits normally
|
||
await output_queue.put(("agent_done", None))
|
||
|
||
# Execute tasks concurrently
|
||
# Only run the preamble task when enable_thinking is True
|
||
if config.enable_thinking:
|
||
preamble_task_handle = asyncio.create_task(preamble_task())
|
||
else:
|
||
# If thinking is disabled, create an empty completed task
|
||
preamble_task_handle = asyncio.create_task(asyncio.sleep(0))
|
||
# Mark preamble as completed directly
|
||
preamble_completed.set()
|
||
|
||
agent_task_handle = asyncio.create_task(agent_task())
|
||
|
||
# Output controller: ensure preamble is emitted before the agent stream
|
||
preamble_output_done = False
|
||
last_yield_time = time.time()
|
||
|
||
while True:
|
||
try:
|
||
# Set a timeout to avoid waiting forever
|
||
item_type, item_data = await asyncio.wait_for(output_queue.get(), timeout=1.0)
|
||
|
||
if item_type == "preamble":
|
||
# Emit preamble content immediately
|
||
if item_data:
|
||
yield item_data
|
||
last_yield_time = time.time()
|
||
preamble_output_done = True
|
||
|
||
elif item_type == "preamble_done":
|
||
# Preamble completed, mark it and continue
|
||
preamble_output_done = True
|
||
|
||
elif item_type == "agent":
|
||
# Agent stream content must wait until preamble output is finished
|
||
if preamble_output_done:
|
||
yield item_data
|
||
last_yield_time = time.time()
|
||
else:
|
||
# Preamble has not been emitted yet, put it back into the queue first
|
||
await output_queue.put((item_type, item_data))
|
||
# Wait for preamble completion
|
||
await preamble_completed.wait()
|
||
preamble_output_done = True
|
||
|
||
elif item_type == "agent_done":
|
||
# Agent stream finished, end the loop
|
||
break
|
||
|
||
except asyncio.TimeoutError:
|
||
# Check whether a cancellation signal was received
|
||
if cancel_event and cancel_event.is_set():
|
||
logger.info(f"Output loop cancelled for session_id={config.session_id}")
|
||
break
|
||
|
||
# Check whether any tasks are still running
|
||
if all(task.done() for task in [preamble_task_handle, agent_task_handle]):
|
||
# All tasks are done, exit the loop
|
||
break
|
||
# Only send a heartbeat if no messages have been output for 15 seconds to keep the connection alive
|
||
if time.time() - last_yield_time >= 15:
|
||
heartbeat_chunk = create_stream_chunk(f"chatcmpl-heartbeat", config.model_name, "")
|
||
yield f"data: {json.dumps(heartbeat_chunk, ensure_ascii=False)}\n\n"
|
||
last_yield_time = time.time()
|
||
continue
|
||
|
||
# Send end marker
|
||
yield "data: [DONE]\n\n"
|
||
# Clean up cancellation event
|
||
if config.session_id:
|
||
from utils.cancel_manager import unregister_cancel_event
|
||
unregister_cancel_event(config.session_id)
|
||
logger.info(f"Enhanced stream response completed")
|
||
|
||
|
||
# Save AI response after streaming ends
|
||
if full_response_content and config.session_id:
|
||
asyncio.create_task(_save_assistant_response(config, "".join(full_response_content)))
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error in enhanced_generate_stream_response: {e}")
|
||
yield f'data: {{"error": "{str(e)}"}}\n\n'
|
||
yield "data: [DONE]\n\n"
|
||
# Clean up cancellation event
|
||
if config.session_id:
|
||
from utils.cancel_manager import unregister_cancel_event
|
||
unregister_cancel_event(config.session_id)
|
||
|
||
|
||
async def create_agent_and_generate_response(
|
||
config: AgentConfig
|
||
) -> Union[ChatResponse, StreamingResponse]:
|
||
"""Shared logic for creating an agent and generating a response
|
||
|
||
Args:
|
||
config: AgentConfig object containing all parameters
|
||
"""
|
||
# Use the enhanced streaming response generator for streaming mode
|
||
if config.stream:
|
||
return StreamingResponse(
|
||
enhanced_generate_stream_response(config),
|
||
media_type="text/event-stream",
|
||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
||
)
|
||
|
||
agent, checkpointer, sandbox = await init_agent(config)
|
||
# Use the updated messages
|
||
agent_responses = await agent.ainvoke({"messages": config.messages}, config=config.invoke_config(), max_tokens=MAX_OUTPUT_TOKENS)
|
||
|
||
# ============ Execute PostAgent hooks ============
|
||
# Note: runs in a separate async task here and does not block non-streaming responses
|
||
asyncio.create_task(_execute_post_agent_hooks(config, "", sandbox))
|
||
# ===========================================
|
||
|
||
# Scan backward for the first HumanMessage, then send everything after it to append_messages
|
||
all_messages = agent_responses["messages"]
|
||
first_human_idx = None
|
||
for i in range(len(all_messages) - 1, -1, -1):
|
||
if isinstance(all_messages[i], HumanMessage):
|
||
first_human_idx = i
|
||
break
|
||
|
||
if first_human_idx is not None:
|
||
append_messages = all_messages[first_human_idx + 1:]
|
||
else:
|
||
# If no HumanMessage is found, use all messages
|
||
append_messages = all_messages
|
||
response_text = ""
|
||
for msg in append_messages:
|
||
if isinstance(msg,AIMessage):
|
||
if len(msg.text)>0:
|
||
meta_message_tag = msg.additional_kwargs.get("message_tag", "ANSWER")
|
||
if meta_message_tag == "SUMMARY":
|
||
continue
|
||
output_text = msg.text.replace("````","").replace("````","") if meta_message_tag == "THINK" else msg.text
|
||
response_text += f"[{meta_message_tag}]\n"+output_text+ "\n"
|
||
if len(msg.tool_calls)>0 and config.tool_response:
|
||
response_text += "".join([f"[TOOL_CALL] {tool['name']}\n{json.dumps(tool["args"]) if isinstance(tool["args"],dict) else tool["args"]}\n" for tool in msg.tool_calls])
|
||
elif isinstance(msg,ToolMessage) and config.tool_response:
|
||
response_text += f"[TOOL_RESPONSE] {msg.name}\n{msg.text}\n"
|
||
|
||
if len(response_text) > 0:
|
||
# Build the OpenAI-format response
|
||
result = ChatResponse(
|
||
choices=[{
|
||
"index": 0,
|
||
"message": {
|
||
"role": "assistant",
|
||
"content": response_text
|
||
},
|
||
"finish_reason": "stop"
|
||
}],
|
||
usage={
|
||
"prompt_tokens": sum(len(extract_text_from_content(msg.get("content", ""))) for msg in config.messages),
|
||
"completion_tokens": len(response_text),
|
||
"total_tokens": sum(len(extract_text_from_content(msg.get("content", ""))) for msg in config.messages) + len(response_text)
|
||
}
|
||
)
|
||
|
||
# Save chat history to the database (same logic as the streaming endpoint)
|
||
await _save_user_messages(config)
|
||
await _save_assistant_response(config, response_text)
|
||
else:
|
||
raise HTTPException(status_code=500, detail="No response from agent")
|
||
|
||
return result
|
||
|
||
async def _save_user_messages(config: AgentConfig) -> None:
|
||
"""
|
||
Save the last user message (for both streaming and non-streaming endpoints).
|
||
|
||
Args:
|
||
config: AgentConfig object
|
||
"""
|
||
# Save only when session_id exists
|
||
if not config.session_id:
|
||
return
|
||
|
||
try:
|
||
from agent.chat_history_manager import get_chat_history_manager
|
||
from agent.plugin_hook_loader import execute_hooks
|
||
|
||
manager = get_chat_history_manager()
|
||
|
||
# Save only the last user message
|
||
for msg in reversed(config.messages):
|
||
if isinstance(msg, dict):
|
||
role = msg.get("role", "")
|
||
content = msg.get("content", "")
|
||
# Flatten multimodal list content to plain text before persisting,
|
||
# so base64 image data is not stored in chat history.
|
||
content = extract_text_from_content(content)
|
||
if role == "user" and content:
|
||
# ============ Execute PreSave hooks ============
|
||
processed_content = await execute_hooks('PreSave', config, content=content, role=role)
|
||
if processed_content:
|
||
content = processed_content
|
||
# ================================================
|
||
|
||
await manager.manager.save_message(
|
||
session_id=config.session_id,
|
||
role=role,
|
||
content=content,
|
||
bot_id=config.bot_id,
|
||
user_identifier=config.user_identifier
|
||
)
|
||
break # Save only the last one, then exit
|
||
|
||
logger.debug(f"Saved last user message for session_id={config.session_id}")
|
||
except Exception as e:
|
||
# Save failure should not affect the main flow
|
||
logger.error(f"Failed to save user messages: {e}")
|
||
|
||
|
||
async def _save_assistant_response(config: AgentConfig, assistant_response: str) -> None:
|
||
"""
|
||
Save the AI assistant response (for both streaming and non-streaming endpoints).
|
||
|
||
Args:
|
||
config: AgentConfig object
|
||
assistant_response: AI assistant response content
|
||
"""
|
||
# Save only when session_id exists
|
||
if not config.session_id:
|
||
return
|
||
|
||
if not assistant_response:
|
||
return
|
||
|
||
try:
|
||
from agent.chat_history_manager import get_chat_history_manager
|
||
from agent.plugin_hook_loader import execute_hooks
|
||
|
||
manager = get_chat_history_manager()
|
||
|
||
# ============ Execute PreSave hooks ============
|
||
processed_response = await execute_hooks('PreSave', config, content=assistant_response, role='assistant')
|
||
if processed_response:
|
||
assistant_response = processed_response
|
||
# ================================================
|
||
|
||
# Save the AI assistant response
|
||
await manager.manager.save_message(
|
||
session_id=config.session_id,
|
||
role="assistant",
|
||
content=assistant_response,
|
||
bot_id=config.bot_id,
|
||
user_identifier=config.user_identifier
|
||
)
|
||
|
||
logger.debug(f"Saved assistant response for session_id={config.session_id}")
|
||
except Exception as e:
|
||
# Save failure should not affect the main flow
|
||
logger.error(f"Failed to save assistant response: {e}")
|
||
|
||
|
||
async def _execute_post_agent_hooks(config: AgentConfig, response: str, sandbox=None) -> None:
|
||
"""
|
||
Execute PostAgent hooks (after agent execution).
|
||
|
||
Args:
|
||
config: AgentConfig object
|
||
response: Full response content from the agent
|
||
sandbox: Optional DaytonaSandbox instance used for reverse file sync
|
||
"""
|
||
try:
|
||
from agent.plugin_hook_loader import execute_hooks
|
||
|
||
metadata = {
|
||
"bot_id": config.bot_id,
|
||
"user_identifier": config.user_identifier,
|
||
"session_id": config.session_id,
|
||
"language": config.language,
|
||
}
|
||
|
||
await execute_hooks('PostAgent', config, response=response, metadata=metadata)
|
||
logger.debug(f"Executed PostAgent hooks for session_id={config.session_id}")
|
||
except Exception as e:
|
||
# Hook execution failure should not affect the main flow
|
||
logger.error(f"Failed to execute PostAgent hooks: {e}")
|
||
|
||
# Clean up the executable_code/tmp folder
|
||
await _cleanup_tmp_folder(config)
|
||
|
||
# Daytona: reverse-sync sandbox files to local
|
||
if sandbox is not None and DAYTONA_ENABLED:
|
||
try:
|
||
from pathlib import Path
|
||
local_workspace = str(Path.cwd() / "projects" / "robot" / config.bot_id)
|
||
sync_sandbox_to_local(sandbox, local_workspace)
|
||
except Exception as e:
|
||
logger.error(f"Failed to sync sandbox to local: {e}")
|
||
|
||
|
||
async def _cleanup_tmp_folder(config: AgentConfig) -> None:
|
||
"""
|
||
Clean files older than 3 days from the executable_code/tmp folder.
|
||
|
||
Args:
|
||
config: AgentConfig object
|
||
"""
|
||
try:
|
||
if config.project_dir and config.bot_id:
|
||
tmp_dir = os.path.join(config.project_dir, "executable_code", "tmp")
|
||
if os.path.exists(tmp_dir):
|
||
# Seconds for 3 days ago (3 * 24 * 60 * 60 = 259200)
|
||
three_days_ago = time.time() - (3 * 24 * 60 * 60)
|
||
|
||
deleted_count = 0
|
||
for item in os.listdir(tmp_dir):
|
||
item_path = os.path.join(tmp_dir, item)
|
||
# Check modification time
|
||
if os.path.getmtime(item_path) < three_days_ago:
|
||
if os.path.isfile(item_path) or os.path.islink(item_path):
|
||
os.remove(item_path)
|
||
else:
|
||
shutil.rmtree(item_path)
|
||
deleted_count += 1
|
||
logger.debug(f"Deleted old item: {item_path}")
|
||
|
||
logger.info(f"Cleaned up {deleted_count} old item(s) from tmp folder: {tmp_dir}")
|
||
except Exception as e:
|
||
# Cleanup failure should not affect the main flow
|
||
logger.error(f"Failed to cleanup tmp folder: {e}")
|
||
|
||
|
||
@router.post("/api/v1/chat/completions")
|
||
async def chat_completions(request: ChatRequest, authorization: Optional[str] = Header(None)):
|
||
"""
|
||
Chat completions API similar to OpenAI, supports both streaming and non-streaming
|
||
|
||
Args:
|
||
request: ChatRequest containing messages, model, optional dataset_ids list, required bot_id, system_prompt, mcp_settings, and files
|
||
authorization: Authorization header containing API key (Bearer <API_KEY>)
|
||
|
||
Returns:
|
||
Union[ChatResponse, StreamingResponse]: Chat completion response or stream
|
||
|
||
Notes:
|
||
- dataset_ids: optional parameter; when provided it must be a project ID list (use array format even for a single project)
|
||
- bot_id: required parameter, robot ID
|
||
- no directories are created when dataset_ids is an empty array [], None, or omitted
|
||
- supports multi-knowledge-base merging and automatically resolves duplicate folder names
|
||
|
||
Required Parameters:
|
||
- bot_id: str - target robot ID
|
||
- messages: List[Message] - conversation message list
|
||
Optional Parameters:
|
||
- dataset_ids: List[str] - source knowledge-base project ID list (use array format even for a single project)
|
||
|
||
Example:
|
||
{"bot_id": "my-bot-001", "messages": [{"role": "user", "content": "Hello"}]}
|
||
{"dataset_ids": ["project-123"], "bot_id": "my-bot-001", "messages": [{"role": "user", "content": "Hello"}]}
|
||
{"dataset_ids": ["project-123", "project-456"], "bot_id": "my-bot-002", "messages": [{"role": "user", "content": "Hello"}]}
|
||
{"dataset_ids": ["project-123"], "bot_id": "my-catalog-bot", "messages": [{"role": "user", "content": "Hello"}]}
|
||
"""
|
||
request_started_at = time.monotonic()
|
||
try:
|
||
# v1 endpoint: extract the API key from the Authorization header as the model API key
|
||
api_key = extract_api_key_from_auth(authorization)
|
||
|
||
# Get bot_id (required parameter)
|
||
bot_id = request.bot_id
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id is required")
|
||
|
||
# Create project directory (if dataset_ids exist and the type is not agent)
|
||
project_dir = create_project_directory(request.dataset_ids, bot_id, request.skills)
|
||
|
||
# Collect extra parameters as generate_cfg
|
||
exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'shell_env', 'max_tokens'}
|
||
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
|
||
logger.info("chat_completions generate_cfg_keys=%s model=%s", list(generate_cfg.keys()), request.model)
|
||
# Process messages
|
||
messages = process_messages(request.messages, request.language)
|
||
# Create AgentConfig object
|
||
config = await AgentConfig.from_v1_request(request, api_key, project_dir, generate_cfg, messages)
|
||
config.request_started_at = request_started_at
|
||
# Call the shared agent creation and response generation logic
|
||
return await create_agent_and_generate_response(config)
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in chat_completions: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||
|
||
|
||
@router.post("/api/v1/chat/warmup")
|
||
async def chat_warmup_v1(request: ChatRequest, authorization: Optional[str] = Header(None)):
|
||
"""
|
||
Warmup endpoint - initializes the agent without processing messages for faster subsequent requests.
|
||
|
||
Args:
|
||
request: ChatRequest containing configuration (messages will be ignored for warmup)
|
||
authorization: Authorization header containing API key (Bearer <API_KEY>)
|
||
|
||
Returns:
|
||
JSON response with warmup status and cache key
|
||
|
||
Required Parameters:
|
||
- bot_id: str - target robot ID
|
||
|
||
Notes:
|
||
- this endpoint pre-generates and caches the agent so later chat requests can reuse it
|
||
- the messages parameter is not processed during warmup and is used only for configuration validation
|
||
- the warmed-up agent generates a unique cache key based on the provided configuration parameters
|
||
"""
|
||
try:
|
||
# v1 endpoint: extract the API key from the Authorization header as the model API key
|
||
api_key = extract_api_key_from_auth(authorization)
|
||
|
||
# Get bot_id (required parameter)
|
||
bot_id = request.bot_id
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id is required")
|
||
|
||
# Create project directory (if dataset_ids exist and the type is not agent)
|
||
project_dir = create_project_directory(request.dataset_ids, bot_id, request.skills)
|
||
|
||
# Collect extra parameters as generate_cfg
|
||
exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'shell_env'}
|
||
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
|
||
|
||
# Create an empty message list for warmup (actual messages are not processed during warmup)
|
||
empty_messages = []
|
||
|
||
# Process messages (even if empty)
|
||
messages = process_messages(empty_messages, request.language or "ja")
|
||
|
||
# Create AgentConfig object
|
||
config = await AgentConfig.from_v1_request(request, api_key, project_dir, generate_cfg, messages)
|
||
|
||
# Warm up the mcp_tools cache
|
||
logger.info(f"Warming up mcp_tools for bot_id: {bot_id}")
|
||
from agent.deep_assistant import get_tools_from_mcp
|
||
from agent.prompt_loader import load_mcp_settings_async
|
||
|
||
# Load mcp_settings
|
||
final_mcp_settings = await load_mcp_settings_async(config)
|
||
mcp_settings = final_mcp_settings if final_mcp_settings else []
|
||
if not isinstance(mcp_settings, list) or len(mcp_settings) == 0:
|
||
mcp_settings = []
|
||
|
||
# Warm up mcp_tools (cache logic is already built into get_tools_from_mcp)
|
||
mcp_tools = await get_tools_from_mcp(mcp_settings)
|
||
|
||
return {
|
||
"status": "warmed_up",
|
||
"bot_id": bot_id,
|
||
"mcp_tools_count": len(mcp_tools),
|
||
"message": "MCP tools have been cached successfully"
|
||
}
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in chat_warmup_v1: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Warmup failed: {str(e)}")
|
||
|
||
|
||
@router.post("/api/v2/chat/warmup")
|
||
async def chat_warmup_v2(request: ChatRequestV2, authorization: Optional[str] = Header(None)):
|
||
"""
|
||
Warmup endpoint v2 - initializes the agent without processing messages for faster subsequent requests.
|
||
Uses the same authentication and configuration retrieval flow as /api/v2/chat/completions.
|
||
|
||
Args:
|
||
request: ChatRequestV2 containing essential parameters (messages will be ignored for warmup)
|
||
authorization: Authorization header for authentication (same as v2 chat endpoint)
|
||
|
||
Returns:
|
||
JSON response with warmup status and cache key
|
||
|
||
Required Parameters:
|
||
- bot_id: str - target robot ID
|
||
|
||
Authentication:
|
||
- Requires valid MD5 hash token: MD5(MASTERKEY:bot_id)
|
||
- Authorization header should contain: Bearer {token}
|
||
|
||
Notes:
|
||
- this endpoint pre-generates and caches the agent so later chat requests can reuse it
|
||
- the messages parameter is not processed during warmup and is used only for configuration validation
|
||
- the warmed-up agent generates a unique cache key based on the full configuration fetched from the backend
|
||
"""
|
||
try:
|
||
# Get bot_id (required parameter)
|
||
bot_id = request.bot_id
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id is required")
|
||
|
||
# v2 endpoint authentication validation (same auth logic as chat_completions_v2)
|
||
expected_token = generate_v2_auth_token(bot_id)
|
||
provided_token = extract_api_key_from_auth(authorization)
|
||
|
||
if not provided_token:
|
||
raise HTTPException(
|
||
status_code=401,
|
||
detail="Authorization header is required for v2 API"
|
||
)
|
||
|
||
if provided_token != expected_token:
|
||
raise HTTPException(
|
||
status_code=403,
|
||
detail=f"Invalid authentication token. Expected: {expected_token[:8]}..., Provided: {provided_token[:8]}..."
|
||
)
|
||
|
||
# Fetch robot configuration from the backend API (using the v2 auth method)
|
||
bot_config = await fetch_bot_config(bot_id)
|
||
# Create project directory (using dataset_ids and skills from backend configuration)
|
||
project_dir = create_project_directory(
|
||
bot_config.get("dataset_ids", []),
|
||
bot_id,
|
||
bot_config.get("skills")
|
||
)
|
||
|
||
# Create an empty message list for warmup (actual messages are not processed during warmup)
|
||
empty_messages = []
|
||
|
||
# Process messages
|
||
messages = process_messages(empty_messages, request.language or "ja")
|
||
|
||
exclude_fields = {'messages', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings', 'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'model', 'model_server', 'api_key', 'shell_env', 'max_tokens'}
|
||
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
|
||
logger.info("chat_warmup_v2 generate_cfg_keys=%s requested_model=%s", list(generate_cfg.keys()), request.model)
|
||
# Extract model/model_server/api_key from the request, with higher priority than bot_config (excluding "whatever" and empty values)
|
||
req_data = request.model_dump()
|
||
req_model = req_data.get("model") or ""
|
||
req_model_server = req_data.get("model_server") or ""
|
||
req_api_key = req_data.get("api_key") or ""
|
||
model_name = req_model if req_model and req_model != "whatever" else None
|
||
model_server = req_model_server if req_model_server and req_model_server != "whatever" else None
|
||
api_key = req_api_key if req_api_key and req_api_key != "whatever" else None
|
||
|
||
# Create AgentConfig object
|
||
config = await AgentConfig.from_v2_request(request, bot_config, project_dir, messages, generate_cfg, model_name=model_name, model_server=model_server, api_key=api_key)
|
||
|
||
# Warm up the mcp_tools cache
|
||
logger.info(f"Warming up mcp_tools for bot_id: {bot_id}")
|
||
from agent.deep_assistant import get_tools_from_mcp
|
||
from agent.prompt_loader import load_mcp_settings_async
|
||
|
||
# Load mcp_settings
|
||
final_mcp_settings = await load_mcp_settings_async(config)
|
||
mcp_settings = final_mcp_settings if final_mcp_settings else []
|
||
if not isinstance(mcp_settings, list) or len(mcp_settings) == 0:
|
||
mcp_settings = []
|
||
|
||
# Warm up mcp_tools (cache logic is already built into get_tools_from_mcp)
|
||
mcp_tools = await get_tools_from_mcp(mcp_settings)
|
||
|
||
return {
|
||
"status": "warmed_up",
|
||
"bot_id": bot_id,
|
||
"mcp_tools_count": len(mcp_tools),
|
||
"message": "MCP tools have been cached successfully"
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in chat_warmup_v2: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Warmup failed: {str(e)}")
|
||
|
||
|
||
@router.post("/api/v2/chat/completions")
|
||
async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[str] = Header(None)):
|
||
"""
|
||
Chat completions API v2 with simplified parameters.
|
||
Only requires messages, stream, tool_response, bot_id, and language parameters.
|
||
Other parameters are fetched from the backend bot configuration API.
|
||
|
||
Args:
|
||
request: ChatRequestV2 containing only essential parameters
|
||
authorization: Authorization header for authentication (different from v1)
|
||
|
||
Returns:
|
||
Union[ChatResponse, StreamingResponse]: Chat completion response or stream
|
||
|
||
Required Parameters:
|
||
- bot_id: str - target robot ID
|
||
- messages: List[Message] - conversation message list
|
||
|
||
Optional Parameters:
|
||
- stream: bool - whether to stream output, default false
|
||
- tool_response: bool - whether to include tool responses, default false
|
||
- language: str - response language, default "ja"
|
||
|
||
Authentication:
|
||
- Requires valid MD5 hash token: MD5(MASTERKEY:bot_id)
|
||
- Authorization header should contain: Bearer {token}
|
||
- Uses MD5 hash of MASTERKEY:bot_id for backend API authentication
|
||
- Optionally uses API key from bot config for model access
|
||
"""
|
||
request_started_at = time.monotonic()
|
||
try:
|
||
# Get bot_id (required parameter)
|
||
bot_id = request.bot_id
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id is required")
|
||
|
||
# v2 endpoint authentication validation
|
||
expected_token = generate_v2_auth_token(bot_id)
|
||
provided_token = extract_api_key_from_auth(authorization)
|
||
|
||
if not provided_token:
|
||
raise HTTPException(
|
||
status_code=401,
|
||
detail="Authorization header is required for v2 API"
|
||
)
|
||
|
||
if provided_token != expected_token:
|
||
raise HTTPException(
|
||
status_code=403,
|
||
detail=f"Invalid authentication token. Expected: {expected_token[:8]}..., Provided: {provided_token[:8]}..."
|
||
)
|
||
|
||
# Fetch robot configuration from the backend API (using the v2 auth method)
|
||
bot_config = await fetch_bot_config(bot_id)
|
||
# Create project directory (using dataset_ids and skills from backend configuration)
|
||
project_dir = create_project_directory(
|
||
bot_config.get("dataset_ids", []),
|
||
bot_id,
|
||
bot_config.get("skills")
|
||
)
|
||
# Process messages
|
||
messages = process_messages(request.messages, request.language)
|
||
# Collect extra parameters as generate_cfg
|
||
exclude_fields = {'messages', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings', 'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'model', 'model_server', 'api_key', 'shell_env', 'max_tokens'}
|
||
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
|
||
logger.info("chat_completions_v2 generate_cfg_keys=%s requested_model=%s", list(generate_cfg.keys()), request.model)
|
||
# Extract model/model_server/api_key from the request, with higher priority than bot_config (excluding "whatever" and empty values)
|
||
req_data = request.model_dump()
|
||
req_model = req_data.get("model") or ""
|
||
req_model_server = req_data.get("model_server") or ""
|
||
req_api_key = req_data.get("api_key") or ""
|
||
model_name = req_model if req_model and req_model != "whatever" else None
|
||
model_server = req_model_server if req_model_server and req_model_server != "whatever" else None
|
||
api_key = req_api_key if req_api_key and req_api_key != "whatever" else None
|
||
# Create AgentConfig object
|
||
config = await AgentConfig.from_v2_request(request, bot_config, project_dir, messages, generate_cfg, model_name=model_name, model_server=model_server, api_key=api_key)
|
||
config.request_started_at = request_started_at
|
||
# Call the shared agent creation and response generation logic
|
||
return await create_agent_and_generate_response(config)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in chat_completions_v2: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||
|
||
@router.post("/api/v1/chat/cancel")
|
||
async def cancel_chat(session_id: str = Body(..., embed=True)):
|
||
"""
|
||
Cancel an ongoing agent inference.
|
||
|
||
Request body: {"session_id": "xxxxx"}
|
||
Response: {"success": true/false, "message": "..."}
|
||
"""
|
||
from utils.cancel_manager import trigger_cancel
|
||
|
||
if not session_id:
|
||
raise HTTPException(status_code=400, detail="session_id is required")
|
||
|
||
found = trigger_cancel(session_id)
|
||
if found:
|
||
return {"success": True, "message": f"Cancel signal sent for session_id={session_id}"}
|
||
else:
|
||
return {"success": False, "message": f"No active inference found for session_id={session_id}"}
|
||
|
||
|
||
@router.post("/api/v3/chat/completions")
|
||
async def chat_completions_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
|
||
"""
|
||
Chat completions API v3 - 从数据库读取配置
|
||
|
||
与 v2 相比,v3 从本地数据库读取所有配置参数,而不是从后端 API。
|
||
前端只需要传递 bot_id 和 messages,其他配置从数据库自动读取。
|
||
|
||
Args:
|
||
request: ChatRequestV3 包含 bot_id, messages, stream, session_id
|
||
authorization: 可选的认证头
|
||
|
||
Returns:
|
||
Union[ChatResponse, StreamingResponse]: Chat completion response or stream
|
||
|
||
Required Parameters:
|
||
- bot_id: str - 目标机器人ID(用户创建时填写的ID)
|
||
- messages: List[Message] - 对话消息列表
|
||
|
||
Optional Parameters:
|
||
- stream: bool - 是否流式输出,默认false
|
||
- session_id: str - 会话ID,用于保存聊天历史
|
||
|
||
Configuration (from database):
|
||
- model: 模型名称
|
||
- api_key: API密钥
|
||
- model_server: 模型服务器地址
|
||
- language: 回复语言
|
||
- tool_response: 是否包含工具响应
|
||
- system_prompt: 系统提示词
|
||
- dataset_ids: 数据集ID列表
|
||
- mcp_settings: MCP服务器配置
|
||
- user_identifier: 用户标识符
|
||
|
||
Authentication:
|
||
- 可选的 Authorization header(如果需要验证)
|
||
"""
|
||
try:
|
||
# 获取bot_id(必需参数)
|
||
bot_id = request.bot_id
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id is required")
|
||
|
||
# 可选的鉴权验证(如果传递了 authorization header)
|
||
if authorization:
|
||
expected_token = generate_v2_auth_token(bot_id)
|
||
provided_token = extract_api_key_from_auth(authorization)
|
||
if provided_token and provided_token != expected_token:
|
||
logger.warning(f"Invalid auth token provided for v3 API, but continuing anyway")
|
||
|
||
# 从数据库获取机器人配置
|
||
bot_config = await fetch_bot_config_from_db(bot_id, request.user_identifier)
|
||
|
||
# 构造类 v2 的请求格式
|
||
# 从数据库配置中提取参数
|
||
language = bot_config.get("language", "zh")
|
||
# 创建项目目录(从数据库配置获取)
|
||
project_dir = create_project_directory(
|
||
bot_config.get("dataset_ids", []),
|
||
bot_id,
|
||
bot_config.get("skills", [])
|
||
)
|
||
|
||
# 处理消息
|
||
messages = process_messages(request.messages, language)
|
||
|
||
# 创建 AgentConfig 对象
|
||
# 需要构造一个兼容 v2 的配置对象
|
||
config = await AgentConfig.from_v3_request(
|
||
request,
|
||
bot_config,
|
||
project_dir,
|
||
messages,
|
||
language
|
||
)
|
||
|
||
# 调用公共的agent创建和响应生成逻辑
|
||
return await create_agent_and_generate_response(config)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in chat_completions_v3: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||
|
||
|
||
async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str] = None):
|
||
"""Build a direct LLM client from a bot's database config.
|
||
|
||
Reuses the v3 config-loading chain to resolve model / api_key / model_server,
|
||
then constructs a LangChain chat model without any agent logic.
|
||
|
||
Returns:
|
||
tuple: (llm_instance, model_name)
|
||
"""
|
||
bot_config = await fetch_bot_config_from_db(bot_id, user_identifier)
|
||
|
||
model_name = bot_config.get("model", "")
|
||
api_key = bot_config.get("api_key", "")
|
||
model_server = bot_config.get("model_server", "")
|
||
|
||
if not model_name:
|
||
raise HTTPException(status_code=400, detail=f"No model configured for bot '{bot_id}'")
|
||
|
||
# Detect provider and sanitize kwargs (same as the agent path)
|
||
model_provider, base_url = detect_provider(model_name, model_server)
|
||
model_kwargs, _, _ = sanitize_model_kwargs(
|
||
model_name=model_name,
|
||
model_provider=model_provider,
|
||
base_url=base_url,
|
||
api_key=api_key,
|
||
generate_cfg={},
|
||
source="llm_passthrough"
|
||
)
|
||
|
||
llm = init_chat_model(**model_kwargs)
|
||
return llm, model_name
|
||
|
||
|
||
@router.post("/api/v3/llm/chat/completions")
|
||
async def llm_passthrough_v3(request: LLMPassthroughRequest, authorization: Optional[str] = Header(None)):
|
||
"""LLM passthrough API - direct LLM call, bypassing all agent logic.
|
||
|
||
Only model / api_key / model_server are read from the bot's database config
|
||
(resolved via bot_id). Messages are forwarded to the LLM as-is.
|
||
|
||
Supports vision/multimodal input: a message's content can be a plain string
|
||
or a list of OpenAI-style content parts (text + image_url). Whether images are
|
||
actually recognized depends on the configured model being vision-capable.
|
||
|
||
Required Parameters:
|
||
- bot_id: str - target bot id (used to look up LLM config from db)
|
||
- messages: List[VisionMessage] - conversation messages, passed through directly
|
||
|
||
Optional Parameters:
|
||
- stream: bool - whether to stream the output, default false
|
||
- user_identifier: str - used to resolve the api_key owner
|
||
|
||
Authentication:
|
||
- Authorization header is required: Bearer <token>
|
||
- token = md5(MASTERKEY:bot_id), same scheme as the v2 API
|
||
|
||
Returns:
|
||
Union[dict, StreamingResponse]: OpenAI-compatible completion or stream
|
||
"""
|
||
try:
|
||
bot_id = request.bot_id
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id is required")
|
||
|
||
# Authentication validation (same auth logic as v2: token = md5(MASTERKEY:bot_id))
|
||
expected_token = generate_v2_auth_token(bot_id)
|
||
provided_token = extract_api_key_from_auth(authorization)
|
||
|
||
if not provided_token:
|
||
raise HTTPException(
|
||
status_code=401,
|
||
detail="Authorization header is required"
|
||
)
|
||
|
||
if provided_token != expected_token:
|
||
raise HTTPException(
|
||
status_code=403,
|
||
detail=f"Invalid authentication token. Expected: {expected_token[:8]}..., Provided: {provided_token[:8]}..."
|
||
)
|
||
|
||
# Build the LLM client from db config
|
||
llm, model_name = await build_llm_from_bot_config(bot_id, request.user_identifier)
|
||
|
||
# Forward messages as-is (pure passthrough, no agent processing)
|
||
lc_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
|
||
|
||
chunk_id = f"chatcmpl-{int(time.time())}"
|
||
|
||
# Streaming response
|
||
if request.stream:
|
||
async def generate():
|
||
try:
|
||
async for chunk in llm.astream(lc_messages):
|
||
content = chunk.content if isinstance(chunk.content, str) else str(chunk.content)
|
||
if content:
|
||
data = create_stream_chunk(chunk_id, model_name, content=content)
|
||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||
# Final chunk with finish_reason
|
||
done = create_stream_chunk(chunk_id, model_name, finish_reason="stop")
|
||
yield f"data: {json.dumps(done, ensure_ascii=False)}\n\n"
|
||
yield "data: [DONE]\n\n"
|
||
except Exception as stream_error:
|
||
logger.error(f"Error in LLM passthrough stream: {stream_error}")
|
||
err = {"error": {"message": str(stream_error), "type": "internal_error"}}
|
||
yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
|
||
|
||
return StreamingResponse(generate(), media_type="text/event-stream")
|
||
|
||
# Non-streaming response
|
||
response = await llm.ainvoke(lc_messages)
|
||
content = response.content if isinstance(response.content, str) else str(response.content)
|
||
|
||
return {
|
||
"id": chunk_id,
|
||
"object": "chat.completion",
|
||
"created": int(time.time()),
|
||
"model": model_name,
|
||
"choices": [{
|
||
"index": 0,
|
||
"message": {"role": "assistant", "content": content},
|
||
"finish_reason": "stop"
|
||
}]
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in llm_passthrough_v3: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||
|
||
|
||
# ============================================================================
|
||
# Chat history query endpoints
|
||
# ============================================================================
|
||
|
||
@router.get("/api/v1/chat/history", response_model=dict)
|
||
async def get_chat_history(
|
||
session_id: str,
|
||
last_message_id: Optional[str] = None,
|
||
limit: int = 20
|
||
):
|
||
"""
|
||
Get chat history records.
|
||
|
||
Query from the dedicated chat history table and return full original messages (not affected by checkpoint summary).
|
||
|
||
Parameters:
|
||
session_id: Session ID
|
||
last_message_id: ID of the last message from the previous page, used to fetch older messages
|
||
limit: Number of messages returned per request, default 20, maximum 100
|
||
|
||
Returns:
|
||
{
|
||
"messages": [
|
||
{
|
||
"id": "unique message ID",
|
||
"role": "user or assistant",
|
||
"content": "message content",
|
||
"timestamp": "ISO 8601 formatted timestamp"
|
||
},
|
||
...
|
||
],
|
||
"has_more": true/false // whether more history messages are available
|
||
}
|
||
"""
|
||
try:
|
||
from agent.chat_history_manager import get_chat_history_manager
|
||
|
||
# Parameter validation
|
||
limit = min(max(1, limit), 100)
|
||
|
||
manager = get_chat_history_manager()
|
||
result = await manager.manager.get_history_by_message_id(
|
||
session_id=session_id,
|
||
last_message_id=last_message_id,
|
||
limit=limit
|
||
)
|
||
|
||
return {
|
||
"messages": result["messages"],
|
||
"has_more": result["has_more"]
|
||
}
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in get_chat_history: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||
|
||
|
||
@router.post("/api/v1/chat/history/batch", response_model=BatchSaveChatResponse)
|
||
async def batch_save_chat_history(request: BatchSaveChatRequest):
|
||
"""
|
||
Save chat history in batch.
|
||
|
||
Supports custom batch saving of multiple chat messages to the database.
|
||
|
||
Parameters:
|
||
session_id: Session ID
|
||
messages: List of messages to save, each containing role and content
|
||
bot_id: Robot ID (optional)
|
||
|
||
Request body example:
|
||
{
|
||
"session_id": "test-session-123",
|
||
"messages": [
|
||
{"role": "user", "content": "你好"},
|
||
{"role": "assistant", "content": "你好!有什么可以帮助你的吗?"},
|
||
{"role": "user", "content": "咖啡多少钱一杯"}
|
||
],
|
||
"bot_id": "63069654-7750-409d-9a58-a0960d899a20"
|
||
}
|
||
|
||
Returns:
|
||
{
|
||
"success": true,
|
||
"message": "Successfully saved 3 messages",
|
||
"session_id": "test-session-123",
|
||
"saved_count": 3,
|
||
"message_ids": ["uuid1", "uuid2", "uuid3"]
|
||
}
|
||
"""
|
||
try:
|
||
from agent.chat_history_manager import get_chat_history_manager
|
||
|
||
# Parameter validation
|
||
if not request.session_id:
|
||
raise HTTPException(status_code=400, detail="session_id is required")
|
||
|
||
if not request.messages or len(request.messages) == 0:
|
||
raise HTTPException(status_code=400, detail="messages list is empty")
|
||
|
||
# Convert message format
|
||
messages_dict = [
|
||
{"role": msg.role, "content": msg.content}
|
||
for msg in request.messages
|
||
]
|
||
|
||
manager = get_chat_history_manager()
|
||
message_ids = await manager.manager.save_messages(
|
||
session_id=request.session_id,
|
||
messages=messages_dict,
|
||
bot_id=request.bot_id
|
||
)
|
||
|
||
# Filter out None values
|
||
valid_message_ids = [mid for mid in message_ids if mid is not None]
|
||
saved_count = len(valid_message_ids)
|
||
|
||
return BatchSaveChatResponse(
|
||
success=True,
|
||
message=f"Successfully saved {saved_count} messages",
|
||
session_id=request.session_id,
|
||
saved_count=saved_count,
|
||
message_ids=valid_message_ids
|
||
)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in batch_save_chat_history: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|