92 lines
2.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# Generate an image with Nano Banana Pro (gemini-3-pro-image-preview).
# Supports text-to-image and image+text editing with reference images.
#
# Usage:
# generate-image.sh <prompt> <output> [ref1.png ref2.png ...]
#
# Env: GOOGLE_AI_API_KEY must be set (or sourced from .env.local)
set -euo pipefail
PROMPT="$1"
OUTPUT="$2"
shift 2
REF_IMAGES=("$@")
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="${AI_STUDIO_DIR:-/mnt/work/dev/ai-studio-videos}"
# source api key if not already set
if [[ -z "${GOOGLE_AI_API_KEY:-}" ]] && [[ -f "$PROJECT_DIR/.env.local" ]]; then
source "$PROJECT_DIR/.env.local"
fi
TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"' EXIT
mkdir -p "$(dirname "$OUTPUT")"
# build parts array: images first, then text
echo '[]' > "$TMPDIR/parts.json"
for img in "${REF_IMAGES[@]}"; do
mime=$(file -b --mime-type "$img")
b64f="$TMPDIR/$(basename "$img").b64"
base64 -w0 "$img" > "$b64f"
jq --rawfile b64 "$b64f" --arg mime "$mime" \
'. + [{"inline_data": {"mime_type": $mime, "data": $b64}}]' \
"$TMPDIR/parts.json" > "$TMPDIR/parts_next.json"
cp -f "$TMPDIR/parts_next.json" "$TMPDIR/parts.json"
done
# add text prompt
jq --arg p "$PROMPT" '. + [{"text": $p}]' \
"$TMPDIR/parts.json" > "$TMPDIR/parts_final.json"
# wrap in request
jq -n --slurpfile parts "$TMPDIR/parts_final.json" \
'{
contents: [{parts: $parts[0]}],
generationConfig: {responseModalities: ["TEXT", "IMAGE"]}
}' > "$TMPDIR/request.json"
echo "Generating image (${#REF_IMAGES[@]} refs, $(du -h "$TMPDIR/request.json" | cut -f1) payload)..."
RESPONSE=$(curl -s -X POST \
"https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent" \
-H "x-goog-api-key: ${GOOGLE_AI_API_KEY}" \
-H "Content-Type: application/json" \
-d @"$TMPDIR/request.json")
# error handling
ERROR=$(echo "$RESPONSE" | jq -r '.error.message // empty')
if [[ -n "$ERROR" ]]; then
echo "API Error: $ERROR" >&2; exit 1
fi
BLOCK=$(echo "$RESPONSE" | jq -r '.promptFeedback.blockReason // empty')
if [[ -n "$BLOCK" ]]; then
echo "Blocked: $BLOCK" >&2; exit 1
fi
# extract text if present
TEXT=$(echo "$RESPONSE" | jq -r \
'(.candidates[0].content.parts // [])[] | select(.text) | .text // empty')
if [[ -n "$TEXT" ]]; then
echo "Model: $TEXT"
fi
# extract and save image
IMG=$(echo "$RESPONSE" | jq -r \
'(.candidates[0].content.parts // [])[] | select(.inlineData) | .inlineData.data // empty')
if [[ -n "$IMG" ]]; then
echo "$IMG" | base64 -d > "$OUTPUT"
echo "Saved: $OUTPUT"
else
echo "No image returned." >&2
echo "Finish reason: $(echo "$RESPONSE" | jq -r '.candidates[0].finishReason // "unknown"')" >&2
exit 1
fi