#!/bin/bash # Audio Captcha Solver - Capture from Blackhole + Analyze with Gemini # Usage: ./capture-and-analyze.sh [duration_seconds] DURATION=${1:-5} TIMESTAMP=$(date +%Y%m%d-%H%M%S) AUDIO_FILE="/Users/jakeshore/.clawdbot/workspace/audio-captcha/captures/captcha-${TIMESTAMP}.wav" GEMINI_KEY="AIzaSyClMlVU3Z1jh1UBxTRn25yesH8RU1q_umY" mkdir -p /Users/jakeshore/.clawdbot/workspace/audio-captcha/captures echo "🎤 Recording ${DURATION}s from BlackHole..." # Capture audio from BlackHole 2ch ffmpeg -y -f avfoundation -i ":BlackHole 2ch" -t $DURATION -ar 16000 -ac 1 "$AUDIO_FILE" 2>/dev/null if [ ! -f "$AUDIO_FILE" ]; then echo "❌ Recording failed. Make sure BlackHole is set as output device." exit 1 fi echo "✅ Captured: $AUDIO_FILE" echo "📤 Sending to Gemini for analysis..." # Convert to base64 for API AUDIO_B64=$(base64 -i "$AUDIO_FILE") # Call Gemini with audio curl -s "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${GEMINI_KEY}" \ -H "Content-Type: application/json" \ -d "{ \"contents\": [{ \"parts\": [ {\"text\": \"Listen to this audio captcha and transcribe EXACTLY what is said. Return ONLY the text/numbers spoken, nothing else.\"}, {\"inline_data\": {\"mime_type\": \"audio/wav\", \"data\": \"${AUDIO_B64}\"}} ] }] }" | jq -r '.candidates[0].content.parts[0].text'