antigravity-skills-reference/plugins/antigravity-awesome-skills-claude/skills/ai-studio-image/scripts/generate.py

"""
AI Studio Image — Gerador de Imagens (v2 — Enhanced)

Script principal que conecta com Google AI Studio (Gemini/Imagen)
para gerar imagens humanizadas. Suporta todos os modelos oficiais,
fallback automatico de API keys, e metadados completos.
"""

import argparse
import base64
import json
import re
import sys
import time
from datetime import datetime
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
from config import (
    MODELS,
    DEFAULT_MODEL,
    DEFAULT_FORMAT,
    DEFAULT_HUMANIZATION,
    DEFAULT_MODE,
    DEFAULT_RESOLUTION,
    DEFAULT_PERSON_GENERATION,
    IMAGE_FORMATS,
    FORMAT_ALIASES,
    OUTPUTS_DIR,
    OUTPUT_SETTINGS,
    get_api_key,
    get_all_api_keys,
    safety_check_model,
    safety_check_daily_limit,
)
from prompt_engine import humanize_prompt, analyze_prompt, resolve_format


def _check_dependencies():
    """Verifica dependencias necessarias."""
    try:
        import google.genai  # noqa: F401
    except ImportError:
        print("=" * 60)
        print("  DEPENDENCIA FALTANDO: google-genai")
        print("=" * 60)
        print()
        print("  Instale com:")
        print("    pip install google-genai Pillow python-dotenv")
        print()
        print("  Ou use o requirements.txt:")
        scripts_dir = Path(__file__).parent
        print(f"    pip install -r {scripts_dir / 'requirements.txt'}")
        print()
        sys.exit(1)


def _get_client(api_key: str):
    """Cria cliente Google GenAI."""
    from google import genai
    return genai.Client(api_key=api_key)


# =============================================================================
# GERACAO VIA IMAGEN (imagen-4, imagen-4-ultra, imagen-4-fast)
# =============================================================================

def generate_with_imagen(
    prompt: str,
    model_id: str,
    aspect_ratio: str,
    num_images: int,
    api_key: str,
    resolution: str = "1K",
    person_generation: str = DEFAULT_PERSON_GENERATION,
) -> list[dict]:
    """Gera imagens usando Imagen 4."""
    from google.genai import types

    client = _get_client(api_key)

    config_params = {
        "number_of_images": num_images,
        "aspect_ratio": aspect_ratio,
        "output_mime_type": OUTPUT_SETTINGS["default_mime_type"],
        "person_generation": person_generation,
    }

    # Resolucao (apenas Standard e Ultra suportam 2K)
    if resolution in ("2K",) and "fast" not in model_id:
        config_params["image_size"] = resolution

    config = types.GenerateImagesConfig(**config_params)

    response = client.models.generate_images(
        model=model_id,
        prompt=prompt,
        config=config,
    )

    results = []
    if response.generated_images:
        for img in response.generated_images:
            img_bytes = img.image.image_bytes
            if isinstance(img_bytes, str):
                img_bytes = base64.b64decode(img_bytes)
            results.append({
                "image_bytes": img_bytes,
                "mime_type": OUTPUT_SETTINGS["default_mime_type"],
            })

    return results


# =============================================================================
# GERACAO VIA GEMINI (gemini-flash-image, gemini-pro-image)
# =============================================================================

def generate_with_gemini(
    prompt: str,
    model_id: str,
    aspect_ratio: str,
    api_key: str,
    resolution: str = "1K",
    reference_images: list[Path] | None = None,
) -> list[dict]:
    """Gera imagens usando Gemini (generateContent com modalidade IMAGE)."""
    from google.genai import types
    from PIL import Image

    client = _get_client(api_key)

    # Construir contents
    contents = []

    # Adicionar imagens de referencia (se Gemini Pro Image)
    if reference_images:
        for ref_path in reference_images:
            if Path(ref_path).exists():
                contents.append(Image.open(str(ref_path)))

    contents.append(prompt)

    # Alguns modelos (ex: gemini-2.0-flash-exp) nao suportam aspect_ratio/ImageConfig
    # Verificar via config ou fallback por ID
    supports_ar = True
    for _mk, _mc in MODELS.items():
        if _mc["id"] == model_id:
            supports_ar = _mc.get("supports_aspect_ratio", True)
            break

    if not supports_ar:
        config = types.GenerateContentConfig(
            response_modalities=["TEXT", "IMAGE"],
        )
    else:
        # Config com modalidades e aspect ratio
        image_config = types.ImageConfig(aspect_ratio=aspect_ratio)

        # Resolucao (Pro suporta ate 4K)
        if resolution in ("2K", "4K") and "pro" in model_id.lower():
            image_config = types.ImageConfig(
                aspect_ratio=aspect_ratio,
                image_size=resolution,
            )

        config = types.GenerateContentConfig(
            response_modalities=["TEXT", "IMAGE"],
            image_config=image_config,
        )

    response = client.models.generate_content(
        model=model_id,
        contents=contents,
        config=config,
    )

    results = []
    if response.candidates:
        for candidate in response.candidates:
            if candidate.content and candidate.content.parts:
                for part in candidate.content.parts:
                    if hasattr(part, 'inline_data') and part.inline_data:
                        img_bytes = part.inline_data.data
                        if isinstance(img_bytes, str):
                            img_bytes = base64.b64decode(img_bytes)
                        results.append({
                            "image_bytes": img_bytes,
                            "mime_type": part.inline_data.mime_type or "image/png",
                        })

    return results


# =============================================================================
# SALVAR IMAGEM + METADADOS
# =============================================================================

def save_image(
    image_data: dict,
    output_dir: Path,
    mode: str,
    template: str,
    index: int,
    metadata: dict,
) -> Path:
    """Salva imagem e metadados no disco."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    mime = image_data.get("mime_type", "image/png")
    ext = "png" if "png" in mime else "jpg"

    # Nome descritivo
    template_clean = template.replace(" ", "-")[:20]
    filename = f"{mode}_{template_clean}_{timestamp}_{index}.{ext}"
    filepath = output_dir / filename

    # Salvar imagem
    filepath.write_bytes(image_data["image_bytes"])

    # Salvar metadados
    if OUTPUT_SETTINGS["save_metadata"]:
        meta_path = output_dir / f"{filename}.meta.json"
        meta_path.write_text(
            json.dumps(metadata, indent=2, ensure_ascii=False, default=str),
            encoding="utf-8",
        )

    return filepath


# =============================================================================
# FUNCAO PRINCIPAL — COM FALLBACK DE API KEYS
# =============================================================================

def generate(
    prompt: str,
    mode: str = DEFAULT_MODE,
    format_name: str = DEFAULT_FORMAT,
    humanization: str = DEFAULT_HUMANIZATION,
    lighting: str | None = None,
    model_name: str = DEFAULT_MODEL,
    num_images: int = 1,
    template: str = "custom",
    template_context: str | None = None,
    output_dir: Path | None = None,
    skip_humanization: bool = False,
    resolution: str = DEFAULT_RESOLUTION,
    person_generation: str = DEFAULT_PERSON_GENERATION,
    reference_images: list[Path] | None = None,
    shot_type: str | None = None,
    force_paid: bool = False,
) -> list[Path]:
    """
    Funcao principal de geracao de imagens.

    Fluxo:
    1. Valida e tenta API keys com fallback
    2. Humaniza o prompt (se nao skip)
    3. Chama a API apropriada (Imagen ou Gemini)
    4. Salva imagens + metadados completos
    5. Retorna paths dos arquivos gerados
    """
    # 0. CONTROLADOR DE SEGURANCA — verifica modelo e limite diario
    allowed, msg = safety_check_model(model_name, force=force_paid)
    if not allowed:
        raise SystemExit(f"[SAFETY] {msg}")
    print(f"[SAFETY] {msg}")

    allowed, msg = safety_check_daily_limit(num_images)
    if not allowed:
        raise SystemExit(f"[SAFETY] {msg}")
    print(f"[SAFETY] {msg}")

    # 1. Obter API keys
    api_keys = get_all_api_keys()
    if not api_keys:
        print("=" * 60)
        print("  ERRO: Nenhuma GEMINI_API_KEY encontrada!")
        print("=" * 60)
        print()
        print("  Configure de uma dessas formas:")
        print("  1. Variavel de ambiente: set GEMINI_API_KEY=sua-key")
        print("  2. Arquivo .env em: C:\\Users\\renat\\skills\\ai-studio-image\\")
        print()
        print("  Obtenha sua key em: https://aistudio.google.com/apikey")
        sys.exit(1)

    # 2. Resolver formato (suporta aliases)
    format_name = resolve_format(format_name)
    if format_name not in IMAGE_FORMATS:
        format_name = DEFAULT_FORMAT

    # 3. Humanizar prompt
    if skip_humanization:
        final_prompt = prompt
    else:
        final_prompt = humanize_prompt(
            user_prompt=prompt,
            mode=mode,
            humanization=humanization,
            lighting=lighting,
            template_context=template_context,
            shot_type=shot_type,
            resolution=resolution,
        )

    # 4. Configuracoes do modelo
    model_config = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
    format_config = IMAGE_FORMATS[format_name]
    aspect_ratio = format_config["aspect_ratio"]

    if output_dir is None:
        output_dir = OUTPUTS_DIR
    output_dir.mkdir(parents=True, exist_ok=True)

    num_images = min(num_images, model_config["max_images"])

    print("=" * 60)
    print("  AI STUDIO IMAGE — Gerando Imagem Humanizada")
    print("=" * 60)
    print(f"  Modelo:         {model_config['id']}")
    print(f"  Tipo:           {model_config['type']}")
    print(f"  Modo:           {mode}")
    print(f"  Formato:        {format_name} ({aspect_ratio})")
    print(f"  Humanizacao:    {humanization}")
    print(f"  Resolucao:      {resolution}")
    print(f"  Imagens:        {num_images}")
    if lighting:
        print(f"  Iluminacao:     {lighting}")
    if reference_images:
        print(f"  Referencias:    {len(reference_images)} imagem(ns)")
    print(f"  Output:         {output_dir}")
    print("=" * 60)
    print()

    # 5. Gerar com fallback de API keys
    images = []
    used_key_index = 0
    start_time = time.time()

    max_retries = 3
    retry_delay = 15  # seconds

    for attempt in range(max_retries):
        for i, api_key in enumerate(api_keys):
            try:
                if model_config["type"] == "imagen":
                    images = generate_with_imagen(
                        prompt=final_prompt,
                        model_id=model_config["id"],
                        aspect_ratio=aspect_ratio,
                        num_images=num_images,
                        api_key=api_key,
                        resolution=resolution,
                        person_generation=person_generation,
                    )
                else:
                    images = generate_with_gemini(
                        prompt=final_prompt,
                        model_id=model_config["id"],
                        aspect_ratio=aspect_ratio,
                        api_key=api_key,
                        resolution=resolution,
                        reference_images=reference_images,
                    )

                if images:
                    used_key_index = i
                    break

            except Exception as e:
                error_msg = str(e)
                is_rate_limit = "429" in error_msg or "RESOURCE_EXHAUSTED" in error_msg
                is_last_key = i >= len(api_keys) - 1

                if not is_last_key:
                    print(f"  Key {i+1} falhou ({error_msg[:60]}...), tentando backup...")
                    continue
                elif is_rate_limit and attempt < max_retries - 1:
                    # Extrair delay sugerido da resposta se possivel
                    delay_match = re.search(r'retryDelay.*?(\d+)', error_msg)
                    wait_time = int(delay_match.group(1)) if delay_match else retry_delay
                    wait_time = min(wait_time + 5, 60)  # cap at 60s
                    print(f"  Rate limit atingido. Aguardando {wait_time}s (tentativa {attempt+1}/{max_retries})...")
                    time.sleep(wait_time)
                    break  # Break inner loop to retry all keys
                else:
                    print(f"\n  ERRO: Todas as tentativas falharam.")
                    print(f"  Ultimo erro: {error_msg[:200]}")
                    print()
                    if is_rate_limit:
                        print("  Rate limit esgotado. Sugestoes:")
                        print("  - Aguarde alguns minutos e tente novamente")
                        print("  - Habilite billing no Google Cloud para limites maiores")
                        print("  - Use um modelo diferente (--model imagen-4-fast)")
                    else:
                        print("  Dicas:")
                        print("  - Verifique se a API key e valida")
                        print("  - O prompt pode conter conteudo restrito")
                        print("  - Tente simplificar o prompt")
                    print("  - Verifique: https://aistudio.google.com/")
                    return []

        if images:
            break

    elapsed = time.time() - start_time

    if not images:
        print("\n  Nenhuma imagem gerada. Verifique o prompt e tente novamente.")
        return []

    # 6. Salvar imagens e metadados
    metadata = {
        "original_prompt": prompt,
        "humanized_prompt": final_prompt,
        "mode": mode,
        "format": format_name,
        "aspect_ratio": aspect_ratio,
        "humanization": humanization,
        "lighting": lighting,
        "shot_type": shot_type,
        "model": model_config["id"],
        "model_name": model_name,
        "model_type": model_config["type"],
        "resolution": resolution,
        "person_generation": person_generation,
        "template": template,
        "num_images_requested": num_images,
        "num_images_generated": len(images),
        "generation_time_seconds": round(elapsed, 2),
        "api_key_index": used_key_index,
        "generated_at": datetime.now().isoformat(),
        "reference_images": [str(p) for p in (reference_images or [])],
    }

    saved_paths = []
    for idx, img_data in enumerate(images):
        filepath = save_image(
            image_data=img_data,
            output_dir=output_dir,
            mode=mode,
            template=template,
            index=idx,
            metadata=metadata,
        )
        saved_paths.append(filepath)
        print(f"  Salvo: {filepath}")

    print(f"\n  {len(saved_paths)} imagem(ns) gerada(s) em {elapsed:.1f}s")

    # Salvar prompt humanizado para referencia
    if OUTPUT_SETTINGS["save_prompt"]:
        prompt_file = output_dir / f"last_prompt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
        content = f"ORIGINAL:\n{prompt}\n\nHUMANIZED:\n{final_prompt}"
        prompt_file.write_text(content, encoding="utf-8")

    return saved_paths


# =============================================================================
# CLI
# =============================================================================

def main():
    parser = argparse.ArgumentParser(
        description="Gerar imagens humanizadas via Google AI Studio",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Exemplos:
  python generate.py --prompt "mulher tomando cafe" --mode influencer
  python generate.py --prompt "professor explicando" --mode educacional --format widescreen
  python generate.py --template cafe-lifestyle --custom "ruiva, 25 anos"
  python generate.py --prompt "produto na mesa" --model imagen-4-ultra --resolution 2K
  python generate.py --prompt "paisagem" --format ultrawide --lighting golden-hour
        """,
    )

    # Prompt ou Template
    parser.add_argument("--prompt", help="Descricao da imagem desejada")
    parser.add_argument("--template", help="Nome do template pre-configurado")
    parser.add_argument("--custom", help="Personalizacao sobre o template")

    # Configuracoes principais
    parser.add_argument("--mode", default=DEFAULT_MODE,
                       choices=["influencer", "educacional"])
    parser.add_argument("--format", default=DEFAULT_FORMAT,
                       help="Formato (square, portrait, landscape, stories, widescreen, ultrawide, "
                            "ou aspect ratio como 4:5, 16:9, etc)")
    parser.add_argument("--humanization", default=DEFAULT_HUMANIZATION,
                       choices=["ultra", "natural", "polished", "editorial"])
    parser.add_argument("--lighting",
                       choices=["morning", "golden-hour", "midday", "overcast",
                               "night", "indoor", "blue-hour", "shade"])
    parser.add_argument("--shot-type",
                       help="Tipo de enquadramento (close-up, medium, wide, etc)")

    # Modelo e qualidade
    parser.add_argument("--model", default=DEFAULT_MODEL,
                       choices=list(MODELS.keys()),
                       help=f"Modelo (default: {DEFAULT_MODEL})")
    parser.add_argument("--resolution", default=DEFAULT_RESOLUTION,
                       choices=["1K", "2K", "4K"])
    parser.add_argument("--variations", type=int, default=1,
                       help="Numero de variacoes (1-4)")

    # Avancado
    parser.add_argument("--reference-images", nargs="+", type=Path,
                       help="Imagens de referencia (apenas Gemini Pro Image)")
    parser.add_argument("--person-generation", default=DEFAULT_PERSON_GENERATION,
                       choices=["dont_allow", "allow_adult", "allow_all"])
    parser.add_argument("--skip-humanization", action="store_true",
                       help="Enviar prompt diretamente sem humanizacao")
    parser.add_argument("--force-paid", action="store_true",
                       help="Permite usar modelos com custo (imagen-4, etc). USE COM CUIDADO.")

    # Output
    parser.add_argument("--output", type=Path, help="Diretorio de saida customizado")

    # Utilidades
    parser.add_argument("--analyze", action="store_true",
                       help="Apenas analisa o prompt e sugere configuracoes")
    parser.add_argument("--list-models", action="store_true",
                       help="Lista todos os modelos disponiveis")
    parser.add_argument("--list-formats", action="store_true",
                       help="Lista todos os formatos disponiveis")
    parser.add_argument("--json", action="store_true")

    args = parser.parse_args()

    # Listar modelos
    if args.list_models:
        print("\nModelos disponiveis:\n")
        for name, cfg in MODELS.items():
            print(f"  {name:25s} {cfg['description']}")
            print(f"  {'':25s} ID: {cfg['id']}")
            print(f"  {'':25s} Max imagens: {cfg['max_images']} | "
                  f"Max res: {cfg.get('max_resolution', 'N/A')}")
            print()
        return

    # Listar formatos
    if args.list_formats:
        print("\nFormatos disponiveis:\n")
        for name, cfg in IMAGE_FORMATS.items():
            print(f"  {name:20s} {cfg['aspect_ratio']:8s} {cfg['description']}")
        print("\nAliases aceitos:\n")
        for alias, target in sorted(FORMAT_ALIASES.items()):
            if alias != target:
                print(f"  {alias:25s} -> {target}")
        return

    # Modo analise
    if args.analyze:
        if not args.prompt:
            print("ERRO: --prompt obrigatorio com --analyze")
            sys.exit(1)
        analysis = analyze_prompt(args.prompt)
        if args.json:
            print(json.dumps(analysis, indent=2, ensure_ascii=False))
        else:
            print("\nAnalise do prompt:\n")
            for k, v in analysis.items():
                if k != "analysis":
                    print(f"  {k:20s} {v or 'auto'}")
        return

    # Template ou prompt
    template_context = None
    if args.template:
        from templates import get_template
        tmpl = get_template(args.template)
        if not tmpl:
            print(f"ERRO: Template '{args.template}' nao encontrado")
            print("Use: python templates.py --list")
            sys.exit(1)

        prompt = tmpl["prompt"]
        if args.custom:
            prompt = f"{prompt}. Additional specific details: {args.custom}"
        template_context = tmpl.get("context", "")

        if args.mode == DEFAULT_MODE and "mode" in tmpl:
            args.mode = tmpl["mode"]
        if args.format == DEFAULT_FORMAT and "suggested_format" in tmpl:
            args.format = tmpl["suggested_format"]
        if not args.lighting and "suggested_lighting" in tmpl:
            args.lighting = tmpl["suggested_lighting"]
        if args.humanization == DEFAULT_HUMANIZATION and "suggested_humanization" in tmpl:
            args.humanization = tmpl["suggested_humanization"]
    elif args.prompt:
        prompt = args.prompt
    else:
        print("ERRO: Forneca --prompt ou --template")
        print("Use --help para ver todas as opcoes")
        sys.exit(1)

    _check_dependencies()

    # Gerar
    paths = generate(
        prompt=prompt,
        mode=args.mode,
        format_name=args.format,
        humanization=args.humanization,
        lighting=args.lighting,
        model_name=args.model,
        num_images=args.variations,
        template=args.template or "custom",
        template_context=template_context,
        output_dir=args.output,
        skip_humanization=args.skip_humanization,
        resolution=args.resolution,
        person_generation=args.person_generation,
        reference_images=args.reference_images,
        shot_type=args.shot_type,
        force_paid=args.force_paid,
    )

    if args.json and paths:
        result = {
            "generated": [str(p) for p in paths],
            "count": len(paths),
            "output_dir": str(paths[0].parent) if paths else None,
        }
        print(json.dumps(result, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()