qwen_agent/skills/seedream/scripts/generate_video.py

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "requests>=2.31.0",
# ]
# ///

import argparse
import json
import os
import sys
import time
import requests

API_BASE = "https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks"


def create_video_task(prompt, model, api_key, image_url=None, duration=5, camera_fixed=False, watermark=True):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    content = [
        {
            "type": "text",
            "text": f"{prompt}  --duration {duration} --camerafixed {str(camera_fixed).lower()} --watermark {str(watermark).lower()}"
        }
    ]

    if image_url:
        content.append({
            "type": "image_url",
            "image_url": {"url": image_url}
        })

    payload = {
        "model": model,
        "content": content
    }

    try:
        response = requests.post(API_BASE, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        task_id = result.get("id")
        if not task_id:
            print(f"ERROR: No task id in response. Full response: {json.dumps(result)}")
            sys.exit(1)
        return task_id
    except requests.exceptions.RequestException as e:
        print(f"ERROR: API request failed: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"Response body: {e.response.text}")
        sys.exit(1)


def poll_video_task(task_id, api_key, poll_interval=5, max_wait=600):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    url = f"{API_BASE}/{task_id}"
    elapsed = 0

    while elapsed < max_wait:
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            result = response.json()
            status = result.get("status")

            if status == "succeeded":
                content = result.get("content", {})
                # content can be a dict with "video_url" key or a list of items
                video_url = ""
                if isinstance(content, dict):
                    video_url = content.get("video_url", "")
                elif isinstance(content, list):
                    for item in content:
                        if isinstance(item, dict) and item.get("type") == "video_url":
                            video_url = item.get("video_url", {}).get("url", "")
                            break
                        elif isinstance(item, str) and item.startswith("http"):
                            video_url = item
                            break
                if video_url:
                    print(f"MEDIA_URL: {video_url}")
                    return
                # Fallback: try to find any URL in the response
                resp_str = json.dumps(result)
                import re
                urls = re.findall(r'https?://[^\s"]+\.(mp4|mov|avi|webm)[^\s"]*', resp_str)
                if urls:
                    print(f"MEDIA_URL: {urls[0]}")
                    return
                print(f"ERROR: Task succeeded but no video URL found. Full response: {json.dumps(result)}")
                return

            if status == "failed":
                error = result.get("error", {})
                print(f"ERROR: Task failed. Code: {error.get('code')}, Message: {error.get('message')}")
                sys.exit(1)

            # Still running, wait and retry
            print(f"STATUS: {status} (elapsed {elapsed}s, polling every {poll_interval}s...)", file=sys.stderr)
            time.sleep(poll_interval)
            elapsed += poll_interval

        except requests.exceptions.RequestException as e:
            print(f"ERROR: Polling failed: {e}")
            if hasattr(e, 'response') and e.response is not None:
                print(f"Response body: {e.response.text}")
            sys.exit(1)

    print(f"ERROR: Timed out after {max_wait}s waiting for task {task_id}")
    sys.exit(1)


def main():
    parser = argparse.ArgumentParser(description="Generate videos using Volcengine Seedance API.")
    parser.add_argument("--prompt", required=True, help="Text prompt for video generation")
    parser.add_argument("--model", default="doubao-seedance-1-5-pro-251215", help="Model ID")
    parser.add_argument("--api-key", help="Volcengine API Key")
    parser.add_argument("--image", help="Input image URL for image-to-video")
    parser.add_argument("--duration", type=int, default=5, choices=[5, 10], help="Video duration in seconds (5 or 10)")
    parser.add_argument("--camera-fixed", action="store_true", help="Fix camera position")
    parser.add_argument("--no-watermark", action="store_true", help="Disable watermark")
    parser.add_argument("--poll-interval", type=int, default=5, help="Polling interval in seconds")
    parser.add_argument("--max-wait", type=int, default=600, help="Max wait time in seconds")

    args = parser.parse_args()

    api_key = args.api_key or os.environ.get("VOLC_API_KEY")
    if not api_key:
        print("ERROR: API key is required. Provide via --api-key or VOLC_API_KEY environment variable.")
        sys.exit(1)

    print(f"STATUS: Creating video task...", file=sys.stderr)
    task_id = create_video_task(
        prompt=args.prompt,
        model=args.model,
        api_key=api_key,
        image_url=args.image,
        duration=args.duration,
        camera_fixed=args.camera_fixed,
        watermark=not args.no_watermark
    )
    print(f"STATUS: Task created: {task_id}, polling for result...", file=sys.stderr)

    poll_video_task(
        task_id=task_id,
        api_key=api_key,
        poll_interval=args.poll_interval,
        max_wait=args.max_wait
    )


if __name__ == "__main__":
    main()