clawdbot-workspace/scripts/audio-captcha-solver.sh
2026-01-28 23:00:58 -05:00

88 lines
2.8 KiB
Bash
Executable File

#!/bin/bash
# Audio Captcha Solver
# Records system audio via BlackHole and transcribes with Whisper
#
# SETUP REQUIRED (one-time):
# 1. Install BlackHole: brew install blackhole-2ch
# 2. Open "Audio MIDI Setup" (Spotlight → Audio MIDI Setup)
# 3. Click "+" at bottom left → "Create Multi-Output Device"
# 4. Check BOTH your speakers AND "BlackHole 2ch"
# 5. Right-click the Multi-Output Device → "Use This Device For Sound Output"
# Now audio plays through speakers AND routes to BlackHole for recording.
set -e
# Config
DURATION="${1:-10}" # Default 10 seconds, or pass as arg
OUTPUT_DIR="/tmp/captcha-audio"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
AUDIO_FILE="$OUTPUT_DIR/captcha_$TIMESTAMP.wav"
TRANSCRIPT_FILE="$OUTPUT_DIR/captcha_$TIMESTAMP.txt"
mkdir -p "$OUTPUT_DIR"
echo "🎤 Audio Captcha Solver"
echo "━━━━━━━━━━━━━━━━━━━━━━"
# Check for BlackHole
if ! system_profiler SPAudioDataType 2>/dev/null | grep -q "BlackHole"; then
echo "❌ BlackHole not detected!"
echo ""
echo "Setup instructions:"
echo "1. brew install blackhole-2ch"
echo "2. Reboot (required)"
echo "3. Open 'Audio MIDI Setup'"
echo "4. Create Multi-Output Device with speakers + BlackHole"
echo "5. Set Multi-Output as system output"
exit 1
fi
echo "✅ BlackHole detected"
echo ""
echo "▶️ Play the audio captcha NOW!"
echo "⏱️ Recording for $DURATION seconds..."
echo ""
# Record from BlackHole
ffmpeg -f avfoundation -i ":BlackHole 2ch" -t "$DURATION" -ar 16000 -ac 1 "$AUDIO_FILE" -y -loglevel error
echo "✅ Recording saved: $AUDIO_FILE"
echo ""
echo "🧠 Transcribing with Whisper..."
echo ""
# Transcribe with Whisper (using small model for speed, English)
whisper "$AUDIO_FILE" \
--model small \
--language en \
--output_format txt \
--output_dir "$OUTPUT_DIR" \
2>/dev/null
# Read the result
if [ -f "$OUTPUT_DIR/captcha_$TIMESTAMP.txt" ]; then
RESULT=$(cat "$OUTPUT_DIR/captcha_$TIMESTAMP.txt" | tr -d '\n' | tr -s ' ')
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "📝 CAPTCHA TEXT:"
echo ""
echo " $RESULT"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Also extract just letters/numbers (captchas often have noise words)
CLEANED=$(echo "$RESULT" | grep -oE '[A-Za-z0-9]' | tr -d '\n' | tr '[:lower:]' '[:upper:]')
if [ -n "$CLEANED" ]; then
echo ""
echo "🔤 Extracted characters: $CLEANED"
fi
# Copy to clipboard
echo "$RESULT" | pbcopy
echo ""
echo "📋 Copied to clipboard!"
else
echo "❌ Transcription failed"
exit 1
fi