# Copyright (c) 2025 Agentspan
# Licensed under the MIT License. See LICENSE file in the project root for details.

"""
Media Generation Agent — generate images, audio, and video using AI models.

Demonstrates Conductor's built-in media generation system tasks
(``GENERATE_IMAGE``, ``GENERATE_AUDIO``, ``GENERATE_VIDEO``) exposed as
native agent tools via ``image_tool()``, ``audio_tool()``, and
``video_tool()``.  These are **server-side** tools — no worker process
is needed.

Architecture:
    orchestrator agent
        tools: generate_image  (DALL-E 3)
               text_to_speech  (OpenAI TTS)
               generate_video  (OpenAI Sora)

Requirements:
    - Conductor server with OpenAI integration configured
    - AGENTSPAN_SERVER_URL=http://localhost:6767/api as environment variable
    - AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini as environment variable
"""

from agentspan.agents import Agent, AgentRuntime, audio_tool, image_tool, video_tool
from settings import settings

# ── Media generation tools (server-side, no worker needed) ────────────

gen_image = image_tool(
    name="generate_image",
    description="Generate an image from a text description using DALL-E 3.",
    llm_provider="openai",
    model="dall-e-3",
)

gen_audio = audio_tool(
    name="text_to_speech",
    description="Convert text to natural-sounding speech audio using OpenAI TTS.",
    llm_provider="openai",
    model="tts-1",
)

gen_video = video_tool(
    name="generate_video",
    description="Generate a short video clip from a text description using OpenAI Sora.",
    llm_provider="openai",
    model="sora-2",
    size="1280x720",
    n=1,
)

# ── Orchestrator Agent ────────────────────────────────────────────────

media_agent = Agent(
    name="media_generator",
    model=settings.llm_model,
    tools=[gen_image, gen_audio, gen_video],
    instructions=(
        "You are a creative media generation assistant. You can generate:\n\n"
        "1. **Images** — from text descriptions using DALL-E 3.\n"
        "2. **Audio** — text-to-speech using OpenAI TTS "
        "(voices: alloy, echo, fable, onyx, nova, shimmer).\n"
        "3. **Video** — short video clips from text using OpenAI Sora.\n\n"
        "IMPORTANT: Image prompts MUST be under 950 characters.\n"
        "Call the appropriate tool once and present the result."
    ),
)


if __name__ == "__main__":
    print("Media Generation Agent")
    print("=" * 60)

    with AgentRuntime() as runtime:
        result = runtime.run(
            media_agent,
            "Create an image of a serene Japanese garden with a koi pond "
            "at sunset, cherry blossoms falling gently. Use vivid style. "
            "Then use that image to generate a video with audio narration describing it.",
        )
        result.print_result()

        # Production pattern:
        # 1. Deploy once during CI/CD:
        # runtime.deploy(media_agent)
        # CLI alternative:
        # agentspan deploy --package examples.40_media_generation_agent
        #
        # 2. In a separate long-lived worker process:
        # runtime.serve(media_agent)