qwen_agent/skills/linggan/agnes-image/scripts/understand_image.py
2026-06-14 08:16:00 +08:00

127 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "requests>=2.31.0",
# ]
# ///
"""使用 Agnes AI 的多模态模型理解图片。
传入图片(公网 URL 或本地文件)+ 文本问题,模型基于图片进行描述、分析、问答或信息提取。
"""
import argparse
import base64
import json
import mimetypes
import os
import sys
import time
import requests
API_URL = "https://apihub.agnes-ai.com/v1/chat/completions"
DEFAULT_MODEL = "agnes-2.0-flash"
def file_to_data_uri(path):
"""把本地图片文件转成 data:image Base64。"""
mime = mimetypes.guess_type(path)[0] or "image/png"
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
return f"data:{mime};base64,{b64}"
def call_api(messages, model, api_key, temperature=None, max_tokens=None, retries=3):
"""调用 Agnes chat/completions返回解析后的 JSON。瞬时网络抖动自动重试。"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
payload = {"model": model, "messages": messages}
if temperature is not None:
payload["temperature"] = temperature
if max_tokens is not None:
payload["max_tokens"] = max_tokens
last_err = None
for attempt in range(1, retries + 1):
try:
resp = requests.post(API_URL, headers=headers, json=payload, timeout=180)
resp.raise_for_status()
return resp.json()
except (requests.exceptions.SSLError,
requests.exceptions.ConnectionError,
requests.exceptions.Timeout) as e:
last_err = e
if attempt < retries:
time.sleep(attempt * 2)
continue
raise
raise last_err
def main():
parser = argparse.ArgumentParser(description="使用 Agnes AI 理解/分析图片。")
parser.add_argument("--prompt", required=True, help="对图片的问题或指令(必选)")
parser.add_argument("--image", action="append", default=None,
help="图片公网 URL 或 data:image Base64可多次指定")
parser.add_argument("--image-file", action="append", default=None,
help="本地图片文件路径,自动转 Base64可多次指定")
parser.add_argument("--system", help="可选的系统提示system 消息)")
parser.add_argument("--model", default=DEFAULT_MODEL, help=f"模型 ID默认 {DEFAULT_MODEL}")
parser.add_argument("--temperature", type=float, help="采样温度0~1越低越确定")
parser.add_argument("--max-tokens", type=int, help="最多生成的 token 数")
parser.add_argument("--api-key", help="Agnes API Key也可用 AGNES_API_KEY 环境变量)")
args = parser.parse_args()
api_key = args.api_key or os.environ.get("AGNES_API_KEY")
if not api_key:
print("ERROR: 缺少 API Key请用 --api-key 或设置 AGNES_API_KEY 环境变量。")
sys.exit(1)
# 收集所有图片输入
image_urls = list(args.image or [])
for p in (args.image_file or []):
if not os.path.isfile(p):
print(f"ERROR: 本地图片不存在: {p}")
sys.exit(1)
image_urls.append(file_to_data_uri(p))
# 组装多模态 content文本 + 若干图片
content = [{"type": "text", "text": args.prompt}]
for url in image_urls:
content.append({"type": "image_url", "image_url": {"url": url}})
messages = []
if args.system:
messages.append({"role": "system", "content": args.system})
# 没有图片时退化为纯文本content 用字符串更稳妥
messages.append({"role": "user", "content": content if image_urls else args.prompt})
try:
result = call_api(
messages=messages,
model=args.model,
api_key=api_key,
temperature=args.temperature,
max_tokens=args.max_tokens,
)
except requests.exceptions.RequestException as e:
print(f"ERROR: API 请求失败: {e}")
if getattr(e, "response", None) is not None:
print(f"Response body: {e.response.text[:500]}")
sys.exit(1)
try:
answer = result["choices"][0]["message"]["content"]
except (KeyError, IndexError, TypeError):
print(f"ERROR: 无法解析响应。完整响应: {json.dumps(result, ensure_ascii=False)[:500]}")
sys.exit(1)
print(answer)
if __name__ == "__main__":
main()