.agents/scripts/speak.sh

41 lines
1.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
API_KEY=$(signet secret get OPENROUTER_API_KEY 2>/dev/null)
VOICE="${VOICE:-ash}"
FORMAT="${FORMAT:-wav}"
TEXT="$*"
if [ -z "$TEXT" ]; then
echo "Usage: speak.sh <text to speak>"
exit 1
fi
RESPONSE=$(curl -s https://openrouter.ai/api/v1/chat/completions \
-H "Authorization: Bearer $API_KEY" \
-H "Content-Type: application/json" \
-d "$(jq -n \
--arg text "$TEXT" \
--arg voice "$VOICE" \
--arg fmt "$FORMAT" \
'{
model: "openai/gpt-audio-mini",
modalities: ["text", "audio"],
audio: { voice: $voice, format: $fmt },
messages: [{ role: "user", content: $text }]
}')")
# Extract audio data
AUDIO_DATA=$(echo "$RESPONSE" | jq -r '.choices[0].message.audio.data // empty')
if [ -z "$AUDIO_DATA" ]; then
echo "Error: No audio in response"
echo "$RESPONSE" | jq '.error // .choices[0].message.content // .' 2>/dev/null
exit 1
fi
TMPFILE=$(mktemp /tmp/speak-XXXX.wav)
trap "rm -f $TMPFILE" EXIT
echo "$AUDIO_DATA" | base64 -d > "$TMPFILE"
ffplay -nodisp -autoexit -loglevel quiet "$TMPFILE"