Files

631 lines
22 KiB
Python

"""
AI Studio Image — Gerador de Imagens (v2 — Enhanced)
Script principal que conecta com Google AI Studio (Gemini/Imagen)
para gerar imagens humanizadas. Suporta todos os modelos oficiais,
fallback automatico de API keys, e metadados completos.
"""
import argparse
import base64
import json
import re
import sys
import time
from datetime import datetime
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from config import (
MODELS,
DEFAULT_MODEL,
DEFAULT_FORMAT,
DEFAULT_HUMANIZATION,
DEFAULT_MODE,
DEFAULT_RESOLUTION,
DEFAULT_PERSON_GENERATION,
IMAGE_FORMATS,
FORMAT_ALIASES,
OUTPUTS_DIR,
OUTPUT_SETTINGS,
get_api_key,
get_all_api_keys,
safety_check_model,
safety_check_daily_limit,
)
from prompt_engine import humanize_prompt, analyze_prompt, resolve_format
def _check_dependencies():
"""Verifica dependencias necessarias."""
try:
import google.genai # noqa: F401
except ImportError:
print("=" * 60)
print(" DEPENDENCIA FALTANDO: google-genai")
print("=" * 60)
print()
print(" Instale com:")
print(" pip install google-genai Pillow python-dotenv")
print()
print(" Ou use o requirements.txt:")
scripts_dir = Path(__file__).parent
print(f" pip install -r {scripts_dir / 'requirements.txt'}")
print()
sys.exit(1)
def _get_client(api_key: str):
"""Cria cliente Google GenAI."""
from google import genai
return genai.Client(api_key=api_key)
# =============================================================================
# GERACAO VIA IMAGEN (imagen-4, imagen-4-ultra, imagen-4-fast)
# =============================================================================
def generate_with_imagen(
prompt: str,
model_id: str,
aspect_ratio: str,
num_images: int,
api_key: str,
resolution: str = "1K",
person_generation: str = DEFAULT_PERSON_GENERATION,
) -> list[dict]:
"""Gera imagens usando Imagen 4."""
from google.genai import types
client = _get_client(api_key)
config_params = {
"number_of_images": num_images,
"aspect_ratio": aspect_ratio,
"output_mime_type": OUTPUT_SETTINGS["default_mime_type"],
"person_generation": person_generation,
}
# Resolucao (apenas Standard e Ultra suportam 2K)
if resolution in ("2K",) and "fast" not in model_id:
config_params["image_size"] = resolution
config = types.GenerateImagesConfig(**config_params)
response = client.models.generate_images(
model=model_id,
prompt=prompt,
config=config,
)
results = []
if response.generated_images:
for img in response.generated_images:
img_bytes = img.image.image_bytes
if isinstance(img_bytes, str):
img_bytes = base64.b64decode(img_bytes)
results.append({
"image_bytes": img_bytes,
"mime_type": OUTPUT_SETTINGS["default_mime_type"],
})
return results
# =============================================================================
# GERACAO VIA GEMINI (gemini-flash-image, gemini-pro-image)
# =============================================================================
def generate_with_gemini(
prompt: str,
model_id: str,
aspect_ratio: str,
api_key: str,
resolution: str = "1K",
reference_images: list[Path] | None = None,
) -> list[dict]:
"""Gera imagens usando Gemini (generateContent com modalidade IMAGE)."""
from google.genai import types
from PIL import Image
client = _get_client(api_key)
# Construir contents
contents = []
# Adicionar imagens de referencia (se Gemini Pro Image)
if reference_images:
for ref_path in reference_images:
if Path(ref_path).exists():
contents.append(Image.open(str(ref_path)))
contents.append(prompt)
# Alguns modelos (ex: gemini-2.0-flash-exp) nao suportam aspect_ratio/ImageConfig
# Verificar via config ou fallback por ID
supports_ar = True
for _mk, _mc in MODELS.items():
if _mc["id"] == model_id:
supports_ar = _mc.get("supports_aspect_ratio", True)
break
if not supports_ar:
config = types.GenerateContentConfig(
response_modalities=["TEXT", "IMAGE"],
)
else:
# Config com modalidades e aspect ratio
image_config = types.ImageConfig(aspect_ratio=aspect_ratio)
# Resolucao (Pro suporta ate 4K)
if resolution in ("2K", "4K") and "pro" in model_id.lower():
image_config = types.ImageConfig(
aspect_ratio=aspect_ratio,
image_size=resolution,
)
config = types.GenerateContentConfig(
response_modalities=["TEXT", "IMAGE"],
image_config=image_config,
)
response = client.models.generate_content(
model=model_id,
contents=contents,
config=config,
)
results = []
if response.candidates:
for candidate in response.candidates:
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'inline_data') and part.inline_data:
img_bytes = part.inline_data.data
if isinstance(img_bytes, str):
img_bytes = base64.b64decode(img_bytes)
results.append({
"image_bytes": img_bytes,
"mime_type": part.inline_data.mime_type or "image/png",
})
return results
# =============================================================================
# SALVAR IMAGEM + METADADOS
# =============================================================================
def save_image(
image_data: dict,
output_dir: Path,
mode: str,
template: str,
index: int,
metadata: dict,
) -> Path:
"""Salva imagem e metadados no disco."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
mime = image_data.get("mime_type", "image/png")
ext = "png" if "png" in mime else "jpg"
# Nome descritivo
template_clean = template.replace(" ", "-")[:20]
filename = f"{mode}_{template_clean}_{timestamp}_{index}.{ext}"
filepath = output_dir / filename
# Salvar imagem
filepath.write_bytes(image_data["image_bytes"])
# Salvar metadados
if OUTPUT_SETTINGS["save_metadata"]:
meta_path = output_dir / f"{filename}.meta.json"
meta_path.write_text(
json.dumps(metadata, indent=2, ensure_ascii=False, default=str),
encoding="utf-8",
)
return filepath
# =============================================================================
# FUNCAO PRINCIPAL — COM FALLBACK DE API KEYS
# =============================================================================
def generate(
prompt: str,
mode: str = DEFAULT_MODE,
format_name: str = DEFAULT_FORMAT,
humanization: str = DEFAULT_HUMANIZATION,
lighting: str | None = None,
model_name: str = DEFAULT_MODEL,
num_images: int = 1,
template: str = "custom",
template_context: str | None = None,
output_dir: Path | None = None,
skip_humanization: bool = False,
resolution: str = DEFAULT_RESOLUTION,
person_generation: str = DEFAULT_PERSON_GENERATION,
reference_images: list[Path] | None = None,
shot_type: str | None = None,
force_paid: bool = False,
) -> list[Path]:
"""
Funcao principal de geracao de imagens.
Fluxo:
1. Valida e tenta API keys com fallback
2. Humaniza o prompt (se nao skip)
3. Chama a API apropriada (Imagen ou Gemini)
4. Salva imagens + metadados completos
5. Retorna paths dos arquivos gerados
"""
# 0. CONTROLADOR DE SEGURANCA — verifica modelo e limite diario
allowed, msg = safety_check_model(model_name, force=force_paid)
if not allowed:
raise SystemExit(f"[SAFETY] {msg}")
print(f"[SAFETY] {msg}")
allowed, msg = safety_check_daily_limit(num_images)
if not allowed:
raise SystemExit(f"[SAFETY] {msg}")
print(f"[SAFETY] {msg}")
# 1. Obter API keys
api_keys = get_all_api_keys()
if not api_keys:
print("=" * 60)
print(" ERRO: Nenhuma GEMINI_API_KEY encontrada!")
print("=" * 60)
print()
print(" Configure de uma dessas formas:")
print(" 1. Variavel de ambiente: set GEMINI_API_KEY=sua-key")
print(" 2. Arquivo .env em: C:\\Users\\renat\\skills\\ai-studio-image\\")
print()
print(" Obtenha sua key em: https://aistudio.google.com/apikey")
sys.exit(1)
# 2. Resolver formato (suporta aliases)
format_name = resolve_format(format_name)
if format_name not in IMAGE_FORMATS:
format_name = DEFAULT_FORMAT
# 3. Humanizar prompt
if skip_humanization:
final_prompt = prompt
else:
final_prompt = humanize_prompt(
user_prompt=prompt,
mode=mode,
humanization=humanization,
lighting=lighting,
template_context=template_context,
shot_type=shot_type,
resolution=resolution,
)
# 4. Configuracoes do modelo
model_config = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
format_config = IMAGE_FORMATS[format_name]
aspect_ratio = format_config["aspect_ratio"]
if output_dir is None:
output_dir = OUTPUTS_DIR
output_dir.mkdir(parents=True, exist_ok=True)
num_images = min(num_images, model_config["max_images"])
print("=" * 60)
print(" AI STUDIO IMAGE — Gerando Imagem Humanizada")
print("=" * 60)
print(f" Modelo: {model_config['id']}")
print(f" Tipo: {model_config['type']}")
print(f" Modo: {mode}")
print(f" Formato: {format_name} ({aspect_ratio})")
print(f" Humanizacao: {humanization}")
print(f" Resolucao: {resolution}")
print(f" Imagens: {num_images}")
if lighting:
print(f" Iluminacao: {lighting}")
if reference_images:
print(f" Referencias: {len(reference_images)} imagem(ns)")
print(f" Output: {output_dir}")
print("=" * 60)
print()
# 5. Gerar com fallback de API keys
images = []
used_key_index = 0
start_time = time.time()
max_retries = 3
retry_delay = 15 # seconds
for attempt in range(max_retries):
for i, api_key in enumerate(api_keys):
try:
if model_config["type"] == "imagen":
images = generate_with_imagen(
prompt=final_prompt,
model_id=model_config["id"],
aspect_ratio=aspect_ratio,
num_images=num_images,
api_key=api_key,
resolution=resolution,
person_generation=person_generation,
)
else:
images = generate_with_gemini(
prompt=final_prompt,
model_id=model_config["id"],
aspect_ratio=aspect_ratio,
api_key=api_key,
resolution=resolution,
reference_images=reference_images,
)
if images:
used_key_index = i
break
except Exception as e:
error_msg = str(e)
is_rate_limit = "429" in error_msg or "RESOURCE_EXHAUSTED" in error_msg
is_last_key = i >= len(api_keys) - 1
if not is_last_key:
print(f" Key {i+1} falhou ({error_msg[:60]}...), tentando backup...")
continue
elif is_rate_limit and attempt < max_retries - 1:
# Extrair delay sugerido da resposta se possivel
delay_match = re.search(r'retryDelay.*?(\d+)', error_msg)
wait_time = int(delay_match.group(1)) if delay_match else retry_delay
wait_time = min(wait_time + 5, 60) # cap at 60s
print(f" Rate limit atingido. Aguardando {wait_time}s (tentativa {attempt+1}/{max_retries})...")
time.sleep(wait_time)
break # Break inner loop to retry all keys
else:
print(f"\n ERRO: Todas as tentativas falharam.")
print(f" Ultimo erro: {error_msg[:200]}")
print()
if is_rate_limit:
print(" Rate limit esgotado. Sugestoes:")
print(" - Aguarde alguns minutos e tente novamente")
print(" - Habilite billing no Google Cloud para limites maiores")
print(" - Use um modelo diferente (--model imagen-4-fast)")
else:
print(" Dicas:")
print(" - Verifique se a API key e valida")
print(" - O prompt pode conter conteudo restrito")
print(" - Tente simplificar o prompt")
print(" - Verifique: https://aistudio.google.com/")
return []
if images:
break
elapsed = time.time() - start_time
if not images:
print("\n Nenhuma imagem gerada. Verifique o prompt e tente novamente.")
return []
# 6. Salvar imagens e metadados
metadata = {
"original_prompt": prompt,
"humanized_prompt": final_prompt,
"mode": mode,
"format": format_name,
"aspect_ratio": aspect_ratio,
"humanization": humanization,
"lighting": lighting,
"shot_type": shot_type,
"model": model_config["id"],
"model_name": model_name,
"model_type": model_config["type"],
"resolution": resolution,
"person_generation": person_generation,
"template": template,
"num_images_requested": num_images,
"num_images_generated": len(images),
"generation_time_seconds": round(elapsed, 2),
"api_key_index": used_key_index,
"generated_at": datetime.now().isoformat(),
"reference_images": [str(p) for p in (reference_images or [])],
}
saved_paths = []
for idx, img_data in enumerate(images):
filepath = save_image(
image_data=img_data,
output_dir=output_dir,
mode=mode,
template=template,
index=idx,
metadata=metadata,
)
saved_paths.append(filepath)
print(f" Salvo: {filepath}")
print(f"\n {len(saved_paths)} imagem(ns) gerada(s) em {elapsed:.1f}s")
# Salvar prompt humanizado para referencia
if OUTPUT_SETTINGS["save_prompt"]:
prompt_file = output_dir / f"last_prompt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
content = f"ORIGINAL:\n{prompt}\n\nHUMANIZED:\n{final_prompt}"
prompt_file.write_text(content, encoding="utf-8")
return saved_paths
# =============================================================================
# CLI
# =============================================================================
def main():
parser = argparse.ArgumentParser(
description="Gerar imagens humanizadas via Google AI Studio",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Exemplos:
python generate.py --prompt "mulher tomando cafe" --mode influencer
python generate.py --prompt "professor explicando" --mode educacional --format widescreen
python generate.py --template cafe-lifestyle --custom "ruiva, 25 anos"
python generate.py --prompt "produto na mesa" --model imagen-4-ultra --resolution 2K
python generate.py --prompt "paisagem" --format ultrawide --lighting golden-hour
""",
)
# Prompt ou Template
parser.add_argument("--prompt", help="Descricao da imagem desejada")
parser.add_argument("--template", help="Nome do template pre-configurado")
parser.add_argument("--custom", help="Personalizacao sobre o template")
# Configuracoes principais
parser.add_argument("--mode", default=DEFAULT_MODE,
choices=["influencer", "educacional"])
parser.add_argument("--format", default=DEFAULT_FORMAT,
help="Formato (square, portrait, landscape, stories, widescreen, ultrawide, "
"ou aspect ratio como 4:5, 16:9, etc)")
parser.add_argument("--humanization", default=DEFAULT_HUMANIZATION,
choices=["ultra", "natural", "polished", "editorial"])
parser.add_argument("--lighting",
choices=["morning", "golden-hour", "midday", "overcast",
"night", "indoor", "blue-hour", "shade"])
parser.add_argument("--shot-type",
help="Tipo de enquadramento (close-up, medium, wide, etc)")
# Modelo e qualidade
parser.add_argument("--model", default=DEFAULT_MODEL,
choices=list(MODELS.keys()),
help=f"Modelo (default: {DEFAULT_MODEL})")
parser.add_argument("--resolution", default=DEFAULT_RESOLUTION,
choices=["1K", "2K", "4K"])
parser.add_argument("--variations", type=int, default=1,
help="Numero de variacoes (1-4)")
# Avancado
parser.add_argument("--reference-images", nargs="+", type=Path,
help="Imagens de referencia (apenas Gemini Pro Image)")
parser.add_argument("--person-generation", default=DEFAULT_PERSON_GENERATION,
choices=["dont_allow", "allow_adult", "allow_all"])
parser.add_argument("--skip-humanization", action="store_true",
help="Enviar prompt diretamente sem humanizacao")
parser.add_argument("--force-paid", action="store_true",
help="Permite usar modelos com custo (imagen-4, etc). USE COM CUIDADO.")
# Output
parser.add_argument("--output", type=Path, help="Diretorio de saida customizado")
# Utilidades
parser.add_argument("--analyze", action="store_true",
help="Apenas analisa o prompt e sugere configuracoes")
parser.add_argument("--list-models", action="store_true",
help="Lista todos os modelos disponiveis")
parser.add_argument("--list-formats", action="store_true",
help="Lista todos os formatos disponiveis")
parser.add_argument("--json", action="store_true")
args = parser.parse_args()
# Listar modelos
if args.list_models:
print("\nModelos disponiveis:\n")
for name, cfg in MODELS.items():
print(f" {name:25s} {cfg['description']}")
print(f" {'':25s} ID: {cfg['id']}")
print(f" {'':25s} Max imagens: {cfg['max_images']} | "
f"Max res: {cfg.get('max_resolution', 'N/A')}")
print()
return
# Listar formatos
if args.list_formats:
print("\nFormatos disponiveis:\n")
for name, cfg in IMAGE_FORMATS.items():
print(f" {name:20s} {cfg['aspect_ratio']:8s} {cfg['description']}")
print("\nAliases aceitos:\n")
for alias, target in sorted(FORMAT_ALIASES.items()):
if alias != target:
print(f" {alias:25s} -> {target}")
return
# Modo analise
if args.analyze:
if not args.prompt:
print("ERRO: --prompt obrigatorio com --analyze")
sys.exit(1)
analysis = analyze_prompt(args.prompt)
if args.json:
print(json.dumps(analysis, indent=2, ensure_ascii=False))
else:
print("\nAnalise do prompt:\n")
for k, v in analysis.items():
if k != "analysis":
print(f" {k:20s} {v or 'auto'}")
return
# Template ou prompt
template_context = None
if args.template:
from templates import get_template
tmpl = get_template(args.template)
if not tmpl:
print(f"ERRO: Template '{args.template}' nao encontrado")
print("Use: python templates.py --list")
sys.exit(1)
prompt = tmpl["prompt"]
if args.custom:
prompt = f"{prompt}. Additional specific details: {args.custom}"
template_context = tmpl.get("context", "")
if args.mode == DEFAULT_MODE and "mode" in tmpl:
args.mode = tmpl["mode"]
if args.format == DEFAULT_FORMAT and "suggested_format" in tmpl:
args.format = tmpl["suggested_format"]
if not args.lighting and "suggested_lighting" in tmpl:
args.lighting = tmpl["suggested_lighting"]
if args.humanization == DEFAULT_HUMANIZATION and "suggested_humanization" in tmpl:
args.humanization = tmpl["suggested_humanization"]
elif args.prompt:
prompt = args.prompt
else:
print("ERRO: Forneca --prompt ou --template")
print("Use --help para ver todas as opcoes")
sys.exit(1)
_check_dependencies()
# Gerar
paths = generate(
prompt=prompt,
mode=args.mode,
format_name=args.format,
humanization=args.humanization,
lighting=args.lighting,
model_name=args.model,
num_images=args.variations,
template=args.template or "custom",
template_context=template_context,
output_dir=args.output,
skip_humanization=args.skip_humanization,
resolution=args.resolution,
person_generation=args.person_generation,
reference_images=args.reference_images,
shot_type=args.shot_type,
force_paid=args.force_paid,
)
if args.json and paths:
result = {
"generated": [str(p) for p in paths],
"count": len(paths),
"output_dir": str(paths[0].parent) if paths else None,
}
print(json.dumps(result, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()