126 lines
3.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# Generate a video with Veo 3.1 using first and last frame interpolation.
# Uploads frames via File API, submits to predictLongRunning, polls, downloads.
#
# Usage:
# generate-video.sh <prompt> <first-frame> <last-frame> [output.mp4] [duration]
#
# Env: GOOGLE_AI_API_KEY must be set (or sourced from .env.local)
set -euo pipefail
PROMPT="$1"
FIRST_FRAME="$2"
LAST_FRAME="$3"
OUTPUT="${4:-output/video.mp4}"
DURATION="${5:-8}"
PROJECT_DIR="${AI_STUDIO_DIR:-/mnt/work/dev/ai-studio-videos}"
if [[ -z "${GOOGLE_AI_API_KEY:-}" ]] && [[ -f "$PROJECT_DIR/.env.local" ]]; then
source "$PROJECT_DIR/.env.local"
fi
BASE_URL="https://generativelanguage.googleapis.com/v1beta"
TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"' EXIT
mkdir -p "$(dirname "$OUTPUT")"
FIRST_MIME=$(file -b --mime-type "$FIRST_FRAME")
LAST_MIME=$(file -b --mime-type "$LAST_FRAME")
echo "Encoding frames..."
base64 -w0 "$FIRST_FRAME" > "$TMPDIR/first.b64"
base64 -w0 "$LAST_FRAME" > "$TMPDIR/last.b64"
echo "Building request..."
echo " Duration: ${DURATION}s"
echo " First: $FIRST_FRAME ($FIRST_MIME)"
echo " Last: $LAST_FRAME ($LAST_MIME)"
# Veo predictLongRunning uses bytesBase64Encoded (not inlineData)
# lastFrame goes inside instances[0] (not parameters)
jq -n \
--arg prompt "$PROMPT" \
--rawfile first_b64 "$TMPDIR/first.b64" \
--arg first_mime "$FIRST_MIME" \
--rawfile last_b64 "$TMPDIR/last.b64" \
--arg last_mime "$LAST_MIME" \
--arg duration "$DURATION" \
'{
instances: [{
prompt: $prompt,
image: {
bytesBase64Encoded: $first_b64,
mimeType: $first_mime
},
lastFrame: {
bytesBase64Encoded: $last_b64,
mimeType: $last_mime
}
}],
parameters: {
durationSeconds: ($duration | tonumber),
aspectRatio: "16:9"
}
}' > "$TMPDIR/request.json"
echo " Payload: $(du -h "$TMPDIR/request.json" | cut -f1)"
echo "Submitting to Veo 3.1..."
OPERATION=$(curl -s \
"${BASE_URL}/models/veo-3.1-generate-preview:predictLongRunning" \
-H "x-goog-api-key: ${GOOGLE_AI_API_KEY}" \
-H "Content-Type: application/json" \
-X POST \
-d @"$TMPDIR/request.json")
ERROR=$(echo "$OPERATION" | jq -r '.error.message // empty')
if [[ -n "$ERROR" ]]; then
echo "API Error: $ERROR" >&2; exit 1
fi
OPERATION_NAME=$(echo "$OPERATION" | jq -r '.name')
if [[ -z "$OPERATION_NAME" || "$OPERATION_NAME" == "null" ]]; then
echo "No operation name returned." >&2
echo "$OPERATION" | jq . >&2; exit 1
fi
echo "Operation: $OPERATION_NAME"
echo "Polling..."
while true; do
STATUS=$(curl -s \
-H "x-goog-api-key: ${GOOGLE_AI_API_KEY}" \
"${BASE_URL}/${OPERATION_NAME}")
IS_DONE=$(echo "$STATUS" | jq -r '.done // false')
if [[ "$IS_DONE" == "true" ]]; then
OP_ERROR=$(echo "$STATUS" | jq -r '.error.message // empty')
if [[ -n "$OP_ERROR" ]]; then
echo "Generation failed: $OP_ERROR" >&2; exit 1
fi
VIDEO_URI=$(echo "$STATUS" | jq -r \
'.response.generateVideoResponse.generatedSamples[0].video.uri')
if [[ -z "$VIDEO_URI" || "$VIDEO_URI" == "null" ]]; then
echo "No video URI." >&2
echo "$STATUS" | jq . >&2; exit 1
fi
echo "Downloading..."
curl -sL -o "$OUTPUT" \
-H "x-goog-api-key: ${GOOGLE_AI_API_KEY}" \
"$VIDEO_URI"
echo "Saved: $OUTPUT ($(du -h "$OUTPUT" | cut -f1))"
break
fi
echo " Rendering... (10s)"
sleep 10
done