diff --git a/skills/seedream/SKILL.md b/skills/seedream/SKILL.md new file mode 100755 index 0000000..7a4df40 --- /dev/null +++ b/skills/seedream/SKILL.md @@ -0,0 +1,105 @@ +--- +name: seedream +description: 使用火山引擎 Seedream/Seedance API 生成高质量图片和视频。适用于文生图、图生图、文生视频、图生视频以及生成关联组图的场景。 +--- + +# Seedream + +本 Skill 封装了火山引擎(Volcengine)的 Seedream-4.5 图片生成和 Seedance-1.5 视频生成能力,支持文生图、图生图、文生视频、图生视频。 + +## 使用方法 + +### 文生图 +生成单张图片(默认分辨率): +```bash +python {baseDir}/scripts/generate_image.py --prompt "一只赛博朋克风格的猫" +``` + +指定分辨率(如 2K, 4K 或具体像素): +```bash +python {baseDir}/scripts/generate_image.py --prompt "壮丽的山川日出" --size "2K" +``` + +### 图生图 +提供参考图片 URL: +```bash +python {baseDir}/scripts/generate_image.py --prompt "将其风格变为印象派" --image "https://example.com/input.jpg" +``` + +### 生成组图 +生成一组内容关联的图片(最多 15 张): +```bash +python {baseDir}/scripts/generate_image.py --prompt "一个宇航员在不同行星上的探险经历" --sequential --max-images 4 +``` + +## 参数说明 + +- `--prompt`: (必选) 图像生成的文本描述。 +- `--model`: (可选) 模型 ID,默认为 `doubao-seedream-4-5-251128`。 +- `--size`: (可选) 图像尺寸。支持 `2K`, `4K` 或 `2048x2048` 等格式。 +- `--image`: (可选) 参考图 URL 或 Base64 编码。 +- `--sequential`: (可选) 开启组图生成功能。 +- `--max-images`: (可选) 组图生成的最大图片数量(1-15)。 + +## 工作流 + +1. 调用 `generate_image.py` 脚本。 +2. 脚本会输出以 `MEDIA_URL: ` 开头的图片链接。 +3. 提取链接并使用 Markdown 语法展示:`![Generated Image](URL)`。 +4. 除非用户要求,否则无需下载图片。 + +## 注意事项 + +- Seedream-4.5 支持中英文提示词。 +- 组图功能仅在 Seedream-4.5/4.0 模型中有效。 +- 确保提供的图片 URL 可公开访问。 + +--- + +## 视频生成 + +### 文生视频 +```bash +python {baseDir}/scripts/generate_video.py --prompt "无人机以极快速度穿越复杂障碍或自然奇观,带来沉浸式飞行体验" +``` + +### 图生视频 +提供参考图片 URL: +```bash +python {baseDir}/scripts/generate_video.py --prompt "让画面中的人物动起来" --image "https://example.com/input.jpg" +``` + +指定视频时长(5 或 10 秒): +```bash +python {baseDir}/scripts/generate_video.py --prompt "日出延时摄影" --duration 10 +``` + +固定摄像机位: +```bash +python {baseDir}/scripts/generate_video.py --prompt "一只猫在桌上跳跃" --camera-fixed +``` + +### 视频参数说明 + +- `--prompt`: (必选) 视频生成的文本描述。 +- `--model`: (可选) 模型 ID,默认为 `doubao-seedance-1-5-pro-251215`。 +- `--image`: (可选) 参考图 URL,用于图生视频。 +- `--duration`: (可选) 视频时长,5 或 10 秒,默认 5。 +- `--camera-fixed`: (可选) 固定摄像机位。 +- `--no-watermark`: (可选) 去除水印。 +- `--poll-interval`: (可选) 轮询间隔秒数,默认 5。 +- `--max-wait`: (可选) 最大等待秒数,默认 600。 + +### 视频生成工作流 + +1. 调用 `generate_video.py` 脚本。 +2. 脚本会自动创建异步任务并轮询结果。 +3. 完成后输出以 `MEDIA_URL: ` 开头的视频链接。 +4. 提取链接并展示给用户(视频链接可直接访问)。 +5. 视频生成通常需要 1-3 分钟,请耐心等待。 + +### 视频注意事项 + +- Seedance 视频生成是异步任务,脚本会自动轮询等待结果。 +- 支持中英文提示词。 +- 图生视频时,确保图片 URL 可公开访问。 diff --git a/skills/seedream/scripts/generate_image.py b/skills/seedream/scripts/generate_image.py new file mode 100755 index 0000000..fe8c940 --- /dev/null +++ b/skills/seedream/scripts/generate_image.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "requests>=2.31.0", +# ] +# /// + +import argparse +import json +import os +import sys +import requests + +def generate_image(prompt, model, size, api_key, image_input=None, sequential=False, max_images=1): + url = "https://ark.cn-beijing.volces.com/api/v3/images/generations" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + + payload = { + "model": model, + "prompt": prompt, + "size": size, + } + + if image_input: + payload["image"] = image_input + + if sequential: + payload["sequential_image_generation"] = "auto" + payload["sequential_image_generation_options"] = {"max_images": max_images} + else: + payload["sequential_image_generation"] = "disabled" + + try: + response = requests.post(url, headers=headers, json=payload) + response.raise_for_status() + result = response.json() + + if "data" in result and len(result["data"]) > 0: + for item in result["data"]: + if "url" in item: + print(f"MEDIA_URL: {item['url']}") + elif "b64_json" in item: + # In a real scenario, we might want to save this to a file, + # but for this skill we follow the pattern of providing a URL if possible. + print("ERROR: Received base64 data but expected URL.") + else: + print(f"ERROR: No image data in response. Full response: {json.dumps(result)}") + + except requests.exceptions.RequestException as e: + print(f"ERROR: API request failed: {e}") + if hasattr(e, 'response') and e.response is not None: + print(f"Response body: {e.response.text}") + sys.exit(1) + +def main(): + parser = argparse.ArgumentParser(description="Generate images using Volcengine Seedream API.") + parser.add_argument("--prompt", required=True, help="Text prompt for image generation") + parser.add_argument("--model", default="doubao-seedream-4-5-251128", help="Model ID or Endpoint ID") + parser.add_argument("--size", default="2048x2048", help="Image size (e.g., 2K, 4K, 2048x2048)") + parser.add_argument("--api-key", help="Volcengine API Key") + parser.add_argument("--image", help="Input image URL or base64 (optional)") + parser.add_argument("--sequential", action="store_true", help="Enable sequential image generation (group)") + parser.add_argument("--max-images", type=int, default=1, help="Max images for sequential generation (1-15)") + + args = parser.parse_args() + + api_key = args.api_key or os.environ.get("VOLC_API_KEY") + if not api_key: + print("ERROR: API key is required. Provide via --api-key or VOLC_API_KEY environment variable.") + sys.exit(1) + + generate_image( + prompt=args.prompt, + model=args.model, + size=args.size, + api_key=api_key, + image_input=args.image, + sequential=args.sequential, + max_images=args.max_images + ) + +if __name__ == "__main__": + main() diff --git a/skills/seedream/scripts/generate_video.py b/skills/seedream/scripts/generate_video.py new file mode 100755 index 0000000..13db32b --- /dev/null +++ b/skills/seedream/scripts/generate_video.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "requests>=2.31.0", +# ] +# /// + +import argparse +import json +import os +import sys +import time +import requests + +API_BASE = "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks" + + +def create_video_task(prompt, model, api_key, image_url=None, duration=5, camera_fixed=False, watermark=True): + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + + content = [ + { + "type": "text", + "text": f"{prompt} --duration {duration} --camerafixed {str(camera_fixed).lower()} --watermark {str(watermark).lower()}" + } + ] + + if image_url: + content.append({ + "type": "image_url", + "image_url": {"url": image_url} + }) + + payload = { + "model": model, + "content": content + } + + try: + response = requests.post(API_BASE, headers=headers, json=payload) + response.raise_for_status() + result = response.json() + task_id = result.get("id") + if not task_id: + print(f"ERROR: No task id in response. Full response: {json.dumps(result)}") + sys.exit(1) + return task_id + except requests.exceptions.RequestException as e: + print(f"ERROR: API request failed: {e}") + if hasattr(e, 'response') and e.response is not None: + print(f"Response body: {e.response.text}") + sys.exit(1) + + +def poll_video_task(task_id, api_key, poll_interval=5, max_wait=600): + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + url = f"{API_BASE}/{task_id}" + elapsed = 0 + + while elapsed < max_wait: + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + result = response.json() + status = result.get("status") + + if status == "succeeded": + content = result.get("content", {}) + # content can be a dict with "video_url" key or a list of items + video_url = "" + if isinstance(content, dict): + video_url = content.get("video_url", "") + elif isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "video_url": + video_url = item.get("video_url", {}).get("url", "") + break + elif isinstance(item, str) and item.startswith("http"): + video_url = item + break + if video_url: + print(f"MEDIA_URL: {video_url}") + return + # Fallback: try to find any URL in the response + resp_str = json.dumps(result) + import re + urls = re.findall(r'https?://[^\s"]+\.(mp4|mov|avi|webm)[^\s"]*', resp_str) + if urls: + print(f"MEDIA_URL: {urls[0]}") + return + print(f"ERROR: Task succeeded but no video URL found. Full response: {json.dumps(result)}") + return + + if status == "failed": + error = result.get("error", {}) + print(f"ERROR: Task failed. Code: {error.get('code')}, Message: {error.get('message')}") + sys.exit(1) + + # Still running, wait and retry + print(f"STATUS: {status} (elapsed {elapsed}s, polling every {poll_interval}s...)", file=sys.stderr) + time.sleep(poll_interval) + elapsed += poll_interval + + except requests.exceptions.RequestException as e: + print(f"ERROR: Polling failed: {e}") + if hasattr(e, 'response') and e.response is not None: + print(f"Response body: {e.response.text}") + sys.exit(1) + + print(f"ERROR: Timed out after {max_wait}s waiting for task {task_id}") + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser(description="Generate videos using Volcengine Seedance API.") + parser.add_argument("--prompt", required=True, help="Text prompt for video generation") + parser.add_argument("--model", default="doubao-seedance-1-5-pro-251215", help="Model ID") + parser.add_argument("--api-key", help="Volcengine API Key") + parser.add_argument("--image", help="Input image URL for image-to-video") + parser.add_argument("--duration", type=int, default=5, choices=[5, 10], help="Video duration in seconds (5 or 10)") + parser.add_argument("--camera-fixed", action="store_true", help="Fix camera position") + parser.add_argument("--no-watermark", action="store_true", help="Disable watermark") + parser.add_argument("--poll-interval", type=int, default=5, help="Polling interval in seconds") + parser.add_argument("--max-wait", type=int, default=600, help="Max wait time in seconds") + + args = parser.parse_args() + + api_key = args.api_key or os.environ.get("VOLC_API_KEY") + if not api_key: + print("ERROR: API key is required. Provide via --api-key or VOLC_API_KEY environment variable.") + sys.exit(1) + + print(f"STATUS: Creating video task...", file=sys.stderr) + task_id = create_video_task( + prompt=args.prompt, + model=args.model, + api_key=api_key, + image_url=args.image, + duration=args.duration, + camera_fixed=args.camera_fixed, + watermark=not args.no_watermark + ) + print(f"STATUS: Task created: {task_id}, polling for result...", file=sys.stderr) + + poll_video_task( + task_id=task_id, + api_key=api_key, + poll_interval=args.poll_interval, + max_wait=args.max_wait + ) + + +if __name__ == "__main__": + main()