185 lines
6.1 KiB
Bash
Executable File
185 lines
6.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# Audio Captcha Solver - Agent Browser Edition
|
|
# Uses agent-browser for network interception instead of BlackHole
|
|
#
|
|
# Usage:
|
|
# ./captcha_agent_browser.sh <url> [mode] [target]
|
|
# ./captcha_agent_browser.sh "https://site.com/login" transcribe
|
|
# ./captcha_agent_browser.sh "https://site.com/login" identify "stream"
|
|
# ./captcha_agent_browser.sh "https://site.com/login" describe
|
|
|
|
set -e
|
|
|
|
URL="${1:-}"
|
|
MODE="${2:-transcribe}"
|
|
TARGET="${3:-}"
|
|
|
|
if [ -z "$URL" ]; then
|
|
echo "Usage: $0 <url> [mode] [target]"
|
|
echo ""
|
|
echo "Modes:"
|
|
echo " transcribe - Speech-to-text (default)"
|
|
echo " identify - Which sound is X? (requires target)"
|
|
echo " describe - List all sounds heard"
|
|
echo ""
|
|
echo "Example:"
|
|
echo " $0 'https://example.com/login' identify 'stream'"
|
|
exit 1
|
|
fi
|
|
|
|
OUTPUT_DIR="/tmp/captcha-audio"
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
AUDIO_FILE="$OUTPUT_DIR/captcha_$TIMESTAMP.mp3"
|
|
|
|
mkdir -p "$OUTPUT_DIR"
|
|
|
|
echo "🌐 Audio Captcha Solver (Agent Browser)"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
# Open URL in agent-browser
|
|
echo "📍 Opening: $URL"
|
|
agent-browser open "$URL" --headed
|
|
|
|
echo ""
|
|
echo "👆 Find and click the AUDIO CAPTCHA button on the page"
|
|
echo " (I'm watching network requests for audio files...)"
|
|
echo ""
|
|
|
|
# Wait a moment for page load
|
|
sleep 2
|
|
|
|
# Poll for audio requests
|
|
echo "👂 Listening for audio file requests..."
|
|
MAX_ATTEMPTS=60
|
|
ATTEMPT=0
|
|
AUDIO_URL=""
|
|
|
|
while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
|
|
# Get network requests and look for audio
|
|
REQUESTS=$(agent-browser network requests --json 2>/dev/null || echo "[]")
|
|
|
|
# Look for audio URLs in the requests
|
|
AUDIO_URL=$(echo "$REQUESTS" | grep -oE 'https?://[^"]+\.(mp3|wav|ogg|m4a|webm)[^"]*' | head -1 || true)
|
|
|
|
# Also check for audio content types or captcha audio patterns
|
|
if [ -z "$AUDIO_URL" ]; then
|
|
AUDIO_URL=$(echo "$REQUESTS" | grep -oE 'https?://[^"]*audio[^"]*' | head -1 || true)
|
|
fi
|
|
if [ -z "$AUDIO_URL" ]; then
|
|
AUDIO_URL=$(echo "$REQUESTS" | grep -oE 'https?://[^"]*captcha[^"]*\.(mp3|wav|ogg)[^"]*' | head -1 || true)
|
|
fi
|
|
if [ -z "$AUDIO_URL" ]; then
|
|
AUDIO_URL=$(echo "$REQUESTS" | grep -oE 'https?://[^"]*recaptcha[^"]*audio[^"]*' | head -1 || true)
|
|
fi
|
|
|
|
if [ -n "$AUDIO_URL" ]; then
|
|
echo "🎵 Found audio URL!"
|
|
break
|
|
fi
|
|
|
|
sleep 1
|
|
ATTEMPT=$((ATTEMPT + 1))
|
|
|
|
# Show progress every 10 seconds
|
|
if [ $((ATTEMPT % 10)) -eq 0 ]; then
|
|
echo " Still listening... ($ATTEMPT seconds)"
|
|
fi
|
|
done
|
|
|
|
if [ -z "$AUDIO_URL" ]; then
|
|
echo "❌ No audio file detected after ${MAX_ATTEMPTS} seconds"
|
|
echo ""
|
|
echo "Debugging: Here are recent network requests:"
|
|
agent-browser network requests 2>/dev/null | head -20
|
|
exit 1
|
|
fi
|
|
|
|
echo "📥 Downloading: $AUDIO_URL"
|
|
curl -sL "$AUDIO_URL" -o "$AUDIO_FILE"
|
|
|
|
if [ ! -s "$AUDIO_FILE" ]; then
|
|
echo "❌ Failed to download audio file"
|
|
exit 1
|
|
fi
|
|
|
|
echo "✅ Saved to: $AUDIO_FILE"
|
|
echo ""
|
|
|
|
# Analyze based on mode
|
|
case "$MODE" in
|
|
transcribe)
|
|
echo "🧠 Transcribing with Whisper..."
|
|
whisper "$AUDIO_FILE" \
|
|
--model small \
|
|
--language en \
|
|
--output_format txt \
|
|
--output_dir "$OUTPUT_DIR" \
|
|
2>/dev/null
|
|
|
|
TXT_FILE="${AUDIO_FILE%.mp3}.txt"
|
|
if [ -f "$TXT_FILE" ]; then
|
|
RAW_TEXT=$(cat "$TXT_FILE")
|
|
EXTRACTED=$(echo "$RAW_TEXT" | grep -oE '[A-Za-z0-9]' | tr -d '\n' | tr '[:lower:]' '[:upper:]')
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "📝 Raw text: $RAW_TEXT"
|
|
echo "🔤 Extracted: $EXTRACTED"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
echo "$EXTRACTED" | pbcopy
|
|
echo "📋 Copied to clipboard!"
|
|
else
|
|
echo "❌ Transcription failed"
|
|
fi
|
|
;;
|
|
|
|
identify)
|
|
if [ -z "$TARGET" ]; then
|
|
echo "❌ identify mode requires a target sound"
|
|
echo " Example: $0 '$URL' identify 'stream'"
|
|
exit 1
|
|
fi
|
|
|
|
echo "🧠 Asking Gemini: which sound is '$TARGET'?"
|
|
|
|
PROMPT="Listen to this audio captcha. It contains multiple sounds. Which sound is a \"$TARGET\"? Reply with ONLY the number (1, 2, 3, etc.) of the matching sound. Just the number, nothing else."
|
|
|
|
RESPONSE=$(gemini -p "$PROMPT" -f "$AUDIO_FILE" 2>/dev/null)
|
|
ANSWER=$(echo "$RESPONSE" | grep -oE '[0-9]+' | head -1)
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "🎯 Target: $TARGET"
|
|
echo "✅ Answer: ${ANSWER:-$RESPONSE}"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
echo "${ANSWER:-$RESPONSE}" | pbcopy
|
|
echo "📋 Copied to clipboard!"
|
|
;;
|
|
|
|
describe)
|
|
echo "🧠 Asking Gemini to describe all sounds..."
|
|
|
|
PROMPT="Listen to this audio and describe each distinct sound you hear. Format as: 1: [description], 2: [description], etc."
|
|
|
|
RESPONSE=$(gemini -p "$PROMPT" -f "$AUDIO_FILE" 2>/dev/null)
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "🔊 Sounds detected:"
|
|
echo "$RESPONSE"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
;;
|
|
|
|
*)
|
|
echo "❌ Unknown mode: $MODE"
|
|
echo " Use: transcribe, identify, or describe"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
echo ""
|
|
echo "Done! Browser is still open if you need to enter the answer."
|