clawdbot-workspace/audio-captcha/capture-and-analyze.sh

#!/bin/bash
# Audio Captcha Solver - Capture from Blackhole + Analyze with Gemini
# Usage: ./capture-and-analyze.sh [duration_seconds]

DURATION=${1:-5}
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
AUDIO_FILE="/Users/jakeshore/.clawdbot/workspace/audio-captcha/captures/captcha-${TIMESTAMP}.wav"
GEMINI_KEY="AIzaSyClMlVU3Z1jh1UBxTRn25yesH8RU1q_umY"

mkdir -p /Users/jakeshore/.clawdbot/workspace/audio-captcha/captures

echo "🎤 Recording ${DURATION}s from BlackHole..."

# Capture audio from BlackHole 2ch
ffmpeg -y -f avfoundation -i ":BlackHole 2ch" -t $DURATION -ar 16000 -ac 1 "$AUDIO_FILE" 2>/dev/null

if [ ! -f "$AUDIO_FILE" ]; then
    echo "❌ Recording failed. Make sure BlackHole is set as output device."
    exit 1
fi

echo "✅ Captured: $AUDIO_FILE"
echo "📤 Sending to Gemini for analysis..."

# Convert to base64 for API
AUDIO_B64=$(base64 -i "$AUDIO_FILE")

# Call Gemini with audio
curl -s "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${GEMINI_KEY}" \
  -H "Content-Type: application/json" \
  -d "{
    \"contents\": [{
      \"parts\": [
        {\"text\": \"Listen to this audio captcha and transcribe EXACTLY what is said. Return ONLY the text/numbers spoken, nothing else.\"},
        {\"inline_data\": {\"mime_type\": \"audio/wav\", \"data\": \"${AUDIO_B64}\"}}
      ]
    }]
  }" | jq -r '.candidates[0].content.parts[0].text'