.agents/skills/ai-video-director/scripts/generate-image.sh

#!/usr/bin/env bash
# Generate an image with Nano Banana Pro (gemini-3-pro-image-preview).
# Supports text-to-image and image+text editing with reference images.
#
# Usage:
#   generate-image.sh <prompt> <output> [ref1.png ref2.png ...]
#
# Env: GOOGLE_AI_API_KEY must be set (or sourced from .env.local)
set -euo pipefail

PROMPT="$1"
OUTPUT="$2"
shift 2
REF_IMAGES=("$@")

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="${AI_STUDIO_DIR:-/mnt/work/dev/ai-studio-videos}"

# source api key if not already set
if [[ -z "${GOOGLE_AI_API_KEY:-}" ]] && [[ -f "$PROJECT_DIR/.env.local" ]]; then
  source "$PROJECT_DIR/.env.local"
fi

TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"' EXIT

mkdir -p "$(dirname "$OUTPUT")"

# build parts array: images first, then text
echo '[]' > "$TMPDIR/parts.json"

for img in "${REF_IMAGES[@]}"; do
  mime=$(file -b --mime-type "$img")
  b64f="$TMPDIR/$(basename "$img").b64"
  base64 -w0 "$img" > "$b64f"

  jq --rawfile b64 "$b64f" --arg mime "$mime" \
    '. + [{"inline_data": {"mime_type": $mime, "data": $b64}}]' \
    "$TMPDIR/parts.json" > "$TMPDIR/parts_next.json"
  cp -f "$TMPDIR/parts_next.json" "$TMPDIR/parts.json"
done

# add text prompt
jq --arg p "$PROMPT" '. + [{"text": $p}]' \
  "$TMPDIR/parts.json" > "$TMPDIR/parts_final.json"

# wrap in request
jq -n --slurpfile parts "$TMPDIR/parts_final.json" \
  '{
    contents: [{parts: $parts[0]}],
    generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
  }' > "$TMPDIR/request.json"

echo "Generating image (${#REF_IMAGES[@]} refs, $(du -h "$TMPDIR/request.json" | cut -f1) payload)..."

RESPONSE=$(curl -s -X POST \
  "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent" \
  -H "x-goog-api-key: ${GOOGLE_AI_API_KEY}" \
  -H "Content-Type: application/json" \
  -d @"$TMPDIR/request.json")

# error handling
ERROR=$(echo "$RESPONSE" | jq -r '.error.message // empty')
if [[ -n "$ERROR" ]]; then
  echo "API Error: $ERROR" >&2; exit 1
fi

BLOCK=$(echo "$RESPONSE" | jq -r '.promptFeedback.blockReason // empty')
if [[ -n "$BLOCK" ]]; then
  echo "Blocked: $BLOCK" >&2; exit 1
fi

# extract text if present
TEXT=$(echo "$RESPONSE" | jq -r \
  '(.candidates[0].content.parts // [])[] | select(.text) | .text // empty')
if [[ -n "$TEXT" ]]; then
  echo "Model: $TEXT"
fi

# extract and save image
IMG=$(echo "$RESPONSE" | jq -r \
  '(.candidates[0].content.parts // [])[] | select(.inlineData) | .inlineData.data // empty')

if [[ -n "$IMG" ]]; then
  echo "$IMG" | base64 -d > "$OUTPUT"
  echo "Saved: $OUTPUT"
else
  echo "No image returned." >&2
  echo "Finish reason: $(echo "$RESPONSE" | jq -r '.candidates[0].finishReason // "unknown"')" >&2
  exit 1
fi