39 lines
1.4 KiB
Bash
Executable File
39 lines
1.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# Audio Captcha Solver - Capture from Blackhole + Analyze with Gemini
|
|
# Usage: ./capture-and-analyze.sh [duration_seconds]
|
|
|
|
DURATION=${1:-5}
|
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
AUDIO_FILE="/Users/jakeshore/.clawdbot/workspace/audio-captcha/captures/captcha-${TIMESTAMP}.wav"
|
|
GEMINI_KEY="AIzaSyClMlVU3Z1jh1UBxTRn25yesH8RU1q_umY"
|
|
|
|
mkdir -p /Users/jakeshore/.clawdbot/workspace/audio-captcha/captures
|
|
|
|
echo "🎤 Recording ${DURATION}s from BlackHole..."
|
|
|
|
# Capture audio from BlackHole 2ch
|
|
ffmpeg -y -f avfoundation -i ":BlackHole 2ch" -t $DURATION -ar 16000 -ac 1 "$AUDIO_FILE" 2>/dev/null
|
|
|
|
if [ ! -f "$AUDIO_FILE" ]; then
|
|
echo "❌ Recording failed. Make sure BlackHole is set as output device."
|
|
exit 1
|
|
fi
|
|
|
|
echo "✅ Captured: $AUDIO_FILE"
|
|
echo "📤 Sending to Gemini for analysis..."
|
|
|
|
# Convert to base64 for API
|
|
AUDIO_B64=$(base64 -i "$AUDIO_FILE")
|
|
|
|
# Call Gemini with audio
|
|
curl -s "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${GEMINI_KEY}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{
|
|
\"contents\": [{
|
|
\"parts\": [
|
|
{\"text\": \"Listen to this audio captcha and transcribe EXACTLY what is said. Return ONLY the text/numbers spoken, nothing else.\"},
|
|
{\"inline_data\": {\"mime_type\": \"audio/wav\", \"data\": \"${AUDIO_B64}\"}}
|
|
]
|
|
}]
|
|
}" | jq -r '.candidates[0].content.parts[0].text'
|