透传接口现在支持视觉图片识别

This commit is contained in:
朱潮 2026-06-07 11:02:10 +08:00
parent f45f55b50a
commit 1fc105a732
2 changed files with 37 additions and 4 deletions

View File

@ -13,7 +13,7 @@ logger = logging.getLogger('app')
from utils import (
Message, ChatRequest, ChatResponse, BatchSaveChatRequest, BatchSaveChatResponse
)
from utils.api_models import ChatRequestV2, ChatRequestV3
from utils.api_models import ChatRequestV2, ChatRequestV3, LLMPassthroughRequest
from utils.fastapi_utils import (
process_messages,
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
@ -1004,15 +1004,19 @@ async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str]
@router.post("/api/v3/llm/chat/completions")
async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
async def llm_passthrough_v3(request: LLMPassthroughRequest, authorization: Optional[str] = Header(None)):
"""LLM passthrough API - direct LLM call, bypassing all agent logic.
Only model / api_key / model_server are read from the bot's database config
(resolved via bot_id). Messages are forwarded to the LLM as-is.
Supports vision/multimodal input: a message's content can be a plain string
or a list of OpenAI-style content parts (text + image_url). Whether images are
actually recognized depends on the configured model being vision-capable.
Required Parameters:
- bot_id: str - target bot id (used to look up LLM config from db)
- messages: List[Message] - conversation messages, passed through directly
- messages: List[VisionMessage] - conversation messages, passed through directly
Optional Parameters:
- stream: bool - whether to stream the output, default false

View File

@ -3,7 +3,7 @@
API data models and response schemas.
"""
from typing import Dict, List, Optional, Any, AsyncGenerator
from typing import Dict, List, Optional, Any, AsyncGenerator, Union
from pydantic import BaseModel, Field, field_validator, ConfigDict
class Message(BaseModel):
@ -89,6 +89,35 @@ class ChatRequestV3(BaseModel):
user_identifier: Optional[str] = None
class VisionMessage(BaseModel):
"""Message supporting multimodal content (text + images).
content accepts either:
- a plain string (text only), or
- a list of OpenAI-style content parts, e.g.:
[
{"type": "text", "text": "what is in this image?"},
{"type": "image_url", "image_url": {"url": "https://... or data:image/png;base64,..."}}
]
"""
role: str
content: Union[str, List[Dict[str, Any]]]
class LLMPassthroughRequest(BaseModel):
"""LLM passthrough request model - supports vision/multimodal content.
Only bot_id and messages are required. Config (model/api_key/model_server)
is resolved from the database via bot_id.
"""
messages: List[VisionMessage]
bot_id: str
stream: Optional[bool] = False
user_identifier: Optional[str] = None
model_config = ConfigDict(extra='allow')
class FileProcessRequest(BaseModel):
unique_id: str
files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)")