qwen_agent/skills/seedream/scripts/generate_video.py
2026-03-26 16:00:07 +08:00

162 lines
5.7 KiB
Python
Executable File

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "requests>=2.31.0",
# ]
# ///
import argparse
import json
import os
import sys
import time
import requests
API_BASE = "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks"
def create_video_task(prompt, model, api_key, image_url=None, duration=5, camera_fixed=False, watermark=True):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
content = [
{
"type": "text",
"text": f"{prompt} --duration {duration} --camerafixed {str(camera_fixed).lower()} --watermark {str(watermark).lower()}"
}
]
if image_url:
content.append({
"type": "image_url",
"image_url": {"url": image_url}
})
payload = {
"model": model,
"content": content
}
try:
response = requests.post(API_BASE, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
task_id = result.get("id")
if not task_id:
print(f"ERROR: No task id in response. Full response: {json.dumps(result)}")
sys.exit(1)
return task_id
except requests.exceptions.RequestException as e:
print(f"ERROR: API request failed: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Response body: {e.response.text}")
sys.exit(1)
def poll_video_task(task_id, api_key, poll_interval=5, max_wait=600):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
url = f"{API_BASE}/{task_id}"
elapsed = 0
while elapsed < max_wait:
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
result = response.json()
status = result.get("status")
if status == "succeeded":
content = result.get("content", {})
# content can be a dict with "video_url" key or a list of items
video_url = ""
if isinstance(content, dict):
video_url = content.get("video_url", "")
elif isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "video_url":
video_url = item.get("video_url", {}).get("url", "")
break
elif isinstance(item, str) and item.startswith("http"):
video_url = item
break
if video_url:
print(f"MEDIA_URL: {video_url}")
return
# Fallback: try to find any URL in the response
resp_str = json.dumps(result)
import re
urls = re.findall(r'https?://[^\s"]+\.(mp4|mov|avi|webm)[^\s"]*', resp_str)
if urls:
print(f"MEDIA_URL: {urls[0]}")
return
print(f"ERROR: Task succeeded but no video URL found. Full response: {json.dumps(result)}")
return
if status == "failed":
error = result.get("error", {})
print(f"ERROR: Task failed. Code: {error.get('code')}, Message: {error.get('message')}")
sys.exit(1)
# Still running, wait and retry
print(f"STATUS: {status} (elapsed {elapsed}s, polling every {poll_interval}s...)", file=sys.stderr)
time.sleep(poll_interval)
elapsed += poll_interval
except requests.exceptions.RequestException as e:
print(f"ERROR: Polling failed: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Response body: {e.response.text}")
sys.exit(1)
print(f"ERROR: Timed out after {max_wait}s waiting for task {task_id}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Generate videos using Volcengine Seedance API.")
parser.add_argument("--prompt", required=True, help="Text prompt for video generation")
parser.add_argument("--model", default="doubao-seedance-1-5-pro-251215", help="Model ID")
parser.add_argument("--api-key", help="Volcengine API Key")
parser.add_argument("--image", help="Input image URL for image-to-video")
parser.add_argument("--duration", type=int, default=5, choices=[5, 10], help="Video duration in seconds (5 or 10)")
parser.add_argument("--camera-fixed", action="store_true", help="Fix camera position")
parser.add_argument("--no-watermark", action="store_true", help="Disable watermark")
parser.add_argument("--poll-interval", type=int, default=5, help="Polling interval in seconds")
parser.add_argument("--max-wait", type=int, default=600, help="Max wait time in seconds")
args = parser.parse_args()
api_key = args.api_key or os.environ.get("VOLC_API_KEY")
if not api_key:
print("ERROR: API key is required. Provide via --api-key or VOLC_API_KEY environment variable.")
sys.exit(1)
print(f"STATUS: Creating video task...", file=sys.stderr)
task_id = create_video_task(
prompt=args.prompt,
model=args.model,
api_key=api_key,
image_url=args.image,
duration=args.duration,
camera_fixed=args.camera_fixed,
watermark=not args.no_watermark
)
print(f"STATUS: Task created: {task_id}, polling for result...", file=sys.stderr)
poll_video_task(
task_id=task_id,
api_key=api_key,
poll_interval=args.poll_interval,
max_wait=args.max_wait
)
if __name__ == "__main__":
main()