透传接口现在支持视觉图片识别

This commit is contained in:
朱潮 2026-06-07 11:02:10 +08:00
parent f45f55b50a
commit 1fc105a732
2 changed files with 37 additions and 4 deletions

View File

@ -13,7 +13,7 @@ logger = logging.getLogger('app')
from utils import ( from utils import (
Message, ChatRequest, ChatResponse, BatchSaveChatRequest, BatchSaveChatResponse Message, ChatRequest, ChatResponse, BatchSaveChatRequest, BatchSaveChatResponse
) )
from utils.api_models import ChatRequestV2, ChatRequestV3 from utils.api_models import ChatRequestV2, ChatRequestV3, LLMPassthroughRequest
from utils.fastapi_utils import ( from utils.fastapi_utils import (
process_messages, process_messages,
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db, create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
@ -1004,15 +1004,19 @@ async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str]
@router.post("/api/v3/llm/chat/completions") @router.post("/api/v3/llm/chat/completions")
async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)): async def llm_passthrough_v3(request: LLMPassthroughRequest, authorization: Optional[str] = Header(None)):
"""LLM passthrough API - direct LLM call, bypassing all agent logic. """LLM passthrough API - direct LLM call, bypassing all agent logic.
Only model / api_key / model_server are read from the bot's database config Only model / api_key / model_server are read from the bot's database config
(resolved via bot_id). Messages are forwarded to the LLM as-is. (resolved via bot_id). Messages are forwarded to the LLM as-is.
Supports vision/multimodal input: a message's content can be a plain string
or a list of OpenAI-style content parts (text + image_url). Whether images are
actually recognized depends on the configured model being vision-capable.
Required Parameters: Required Parameters:
- bot_id: str - target bot id (used to look up LLM config from db) - bot_id: str - target bot id (used to look up LLM config from db)
- messages: List[Message] - conversation messages, passed through directly - messages: List[VisionMessage] - conversation messages, passed through directly
Optional Parameters: Optional Parameters:
- stream: bool - whether to stream the output, default false - stream: bool - whether to stream the output, default false

View File

@ -3,7 +3,7 @@
API data models and response schemas. API data models and response schemas.
""" """
from typing import Dict, List, Optional, Any, AsyncGenerator from typing import Dict, List, Optional, Any, AsyncGenerator, Union
from pydantic import BaseModel, Field, field_validator, ConfigDict from pydantic import BaseModel, Field, field_validator, ConfigDict
class Message(BaseModel): class Message(BaseModel):
@ -89,6 +89,35 @@ class ChatRequestV3(BaseModel):
user_identifier: Optional[str] = None user_identifier: Optional[str] = None
class VisionMessage(BaseModel):
"""Message supporting multimodal content (text + images).
content accepts either:
- a plain string (text only), or
- a list of OpenAI-style content parts, e.g.:
[
{"type": "text", "text": "what is in this image?"},
{"type": "image_url", "image_url": {"url": "https://... or data:image/png;base64,..."}}
]
"""
role: str
content: Union[str, List[Dict[str, Any]]]
class LLMPassthroughRequest(BaseModel):
"""LLM passthrough request model - supports vision/multimodal content.
Only bot_id and messages are required. Config (model/api_key/model_server)
is resolved from the database via bot_id.
"""
messages: List[VisionMessage]
bot_id: str
stream: Optional[bool] = False
user_identifier: Optional[str] = None
model_config = ConfigDict(extra='allow')
class FileProcessRequest(BaseModel): class FileProcessRequest(BaseModel):
unique_id: str unique_id: str
files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)") files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)")