# Copyright (c) 2025 Agentspan
# Licensed under the MIT License. See LICENSE file in the project root for details.

"""Multimodal Agent — analyze images and video with vision-capable models.

Demonstrates multimodal input via the ``media`` parameter on
``runtime.run()``.  Pass image or video URLs alongside your text prompt —
the Conductor server includes them in the ChatMessage ``media`` field,
enabling vision-capable models (GPT-4o, Gemini, Claude) to see them.

Supported media types:
    - Images: JPEG, PNG, GIF, WebP (URL or data URI)
    - Video: MP4, MOV (provider-dependent, e.g. Gemini)
    - Audio: MP3, WAV (provider-dependent)

Requirements:
    - Conductor server with LLM support (OpenAI key configured)
    - AGENTSPAN_SERVER_URL=http://localhost:6767/api as environment variable
    - AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini as environment variable
"""

from agentspan.agents import Agent, AgentRuntime, tool
from settings import settings

# ── Example 1: Simple image analysis ─────────────────────────────────

vision_agent = Agent(
    name="vision_analyst",
    model=settings.llm_model,
    instructions=(
        "You are a visual analysis expert. Describe images in detail, "
        "noting composition, colors, subjects, and any text visible."
    ),
)

# ── Example 2: Image analysis with tools ─────────────────────────────

@tool
def search_similar(description: str) -> str:
    """Search for similar images based on a description."""
    return f"Found 3 similar images matching: '{description}'"


@tool
def save_analysis(title: str, analysis: str) -> str:
    """Save an image analysis report."""
    return f"Saved analysis '{title}': {analysis[:100]}..."


vision_with_tools = Agent(
    name="vision_researcher",
    model=settings.llm_model,
    instructions=(
        "You are a visual research assistant. Analyze images, search for "
        "similar ones, and save your findings. Always save your analysis."
    ),
    tools=[search_similar, save_analysis],
)

# ── Example 3: Multi-image comparison ────────────────────────────────

comparator = Agent(
    name="image_comparator",
    model=settings.llm_model,
    instructions=(
        "You are an image comparison specialist. When given multiple images, "
        "compare and contrast them in detail: similarities, differences, "
        "style, composition, and subject matter."
    ),
)

# ── Example 4: Multi-agent pipeline with vision ──────────────────────
# First agent describes the image, second generates a creative story

describer = Agent(
    name="describer",
    model=settings.llm_model,
    instructions="Describe the image in 2-3 vivid sentences.",
)

storyteller = Agent(
    name="storyteller",
    model=settings.llm_model,
    instructions=(
        "You receive an image description. Write a short creative "
        "story (3-4 sentences) inspired by it."
    ),
)

creative_pipeline = describer >> storyteller

# Sample public-domain images for demonstration
SAMPLE_IMAGE = "https://orkes.io/Home-Page-Prompt-to-Workflow-1.png"
SAMPLE_IMAGE_2 = "https://orkes.io/icons/hero-section-workflow_updated.png"


if __name__ == "__main__":
    with AgentRuntime() as runtime:
        # --- 1. Single image analysis ---
        print("=== Single Image Analysis ===")
        result = runtime.run(
            vision_agent,
            "What do you see in this image? Describe it in detail.",
            media=[SAMPLE_IMAGE],
        )
        result.print_result()

        # --- 2. Image analysis with tools ---
        print("\n=== Image Analysis with Tools ===")
        result = runtime.run(
            vision_with_tools,
            "Analyze this image, search for similar ones, and save your findings.",
            media=[SAMPLE_IMAGE],
        )
        result.print_result()

        # --- 3. Compare multiple images ---
        print("\n=== Multi-Image Comparison ===")
        result = runtime.run(
            comparator,
            "Compare these two images. What are the key differences?",
            media=[SAMPLE_IMAGE, SAMPLE_IMAGE_2],
        )
        result.print_result()

        # --- 4. Creative pipeline from image ---
        print("\n=== Creative Pipeline (describe → story) ===")
        result = runtime.run(
            creative_pipeline,
            "Create a story inspired by this image.",
            media=[SAMPLE_IMAGE_2],
        )
        result.print_result()

        # Production pattern:
        # 1. Deploy once during CI/CD:
        # runtime.deploy(vision_agent)
        # CLI alternative:
        # agentspan deploy --package examples.30_multimodal_agent
        #
        # 2. In a separate long-lived worker process:
        # runtime.serve(vision_agent)