透传接口现在支持视觉图片识别
This commit is contained in:
parent
f45f55b50a
commit
1fc105a732
@ -13,7 +13,7 @@ logger = logging.getLogger('app')
|
||||
from utils import (
|
||||
Message, ChatRequest, ChatResponse, BatchSaveChatRequest, BatchSaveChatResponse
|
||||
)
|
||||
from utils.api_models import ChatRequestV2, ChatRequestV3
|
||||
from utils.api_models import ChatRequestV2, ChatRequestV3, LLMPassthroughRequest
|
||||
from utils.fastapi_utils import (
|
||||
process_messages,
|
||||
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, fetch_bot_config_from_db,
|
||||
@ -1004,15 +1004,19 @@ async def build_llm_from_bot_config(bot_id: str, user_identifier: Optional[str]
|
||||
|
||||
|
||||
@router.post("/api/v3/llm/chat/completions")
|
||||
async def llm_passthrough_v3(request: ChatRequestV3, authorization: Optional[str] = Header(None)):
|
||||
async def llm_passthrough_v3(request: LLMPassthroughRequest, authorization: Optional[str] = Header(None)):
|
||||
"""LLM passthrough API - direct LLM call, bypassing all agent logic.
|
||||
|
||||
Only model / api_key / model_server are read from the bot's database config
|
||||
(resolved via bot_id). Messages are forwarded to the LLM as-is.
|
||||
|
||||
Supports vision/multimodal input: a message's content can be a plain string
|
||||
or a list of OpenAI-style content parts (text + image_url). Whether images are
|
||||
actually recognized depends on the configured model being vision-capable.
|
||||
|
||||
Required Parameters:
|
||||
- bot_id: str - target bot id (used to look up LLM config from db)
|
||||
- messages: List[Message] - conversation messages, passed through directly
|
||||
- messages: List[VisionMessage] - conversation messages, passed through directly
|
||||
|
||||
Optional Parameters:
|
||||
- stream: bool - whether to stream the output, default false
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
API data models and response schemas.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional, Any, AsyncGenerator
|
||||
from typing import Dict, List, Optional, Any, AsyncGenerator, Union
|
||||
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
||||
|
||||
class Message(BaseModel):
|
||||
@ -89,6 +89,35 @@ class ChatRequestV3(BaseModel):
|
||||
user_identifier: Optional[str] = None
|
||||
|
||||
|
||||
class VisionMessage(BaseModel):
|
||||
"""Message supporting multimodal content (text + images).
|
||||
|
||||
content accepts either:
|
||||
- a plain string (text only), or
|
||||
- a list of OpenAI-style content parts, e.g.:
|
||||
[
|
||||
{"type": "text", "text": "what is in this image?"},
|
||||
{"type": "image_url", "image_url": {"url": "https://... or data:image/png;base64,..."}}
|
||||
]
|
||||
"""
|
||||
role: str
|
||||
content: Union[str, List[Dict[str, Any]]]
|
||||
|
||||
|
||||
class LLMPassthroughRequest(BaseModel):
|
||||
"""LLM passthrough request model - supports vision/multimodal content.
|
||||
|
||||
Only bot_id and messages are required. Config (model/api_key/model_server)
|
||||
is resolved from the database via bot_id.
|
||||
"""
|
||||
messages: List[VisionMessage]
|
||||
bot_id: str
|
||||
stream: Optional[bool] = False
|
||||
user_identifier: Optional[str] = None
|
||||
|
||||
model_config = ConfigDict(extra='allow')
|
||||
|
||||
|
||||
class FileProcessRequest(BaseModel):
|
||||
unique_id: str
|
||||
files: Optional[Dict[str, List[str]]] = Field(default=None, description="Files organized by key groups. Each key maps to a list of file paths (supports zip files)")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user