#!/usr/bin/env node /** * Multi-Perspective Audio Analyzer * Spawns multiple Gemini instances to analyze audio from different angles * * Usage: node multi-analyze.js */ const fs = require('fs'); const path = require('path'); const GEMINI_KEY = 'AIzaSyClMlVU3Z1jh1UBxTRn25yesH8RU1q_umY'; const GEMINI_URL = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${GEMINI_KEY}`; // Different analysis perspectives const PERSPECTIVES = [ { name: 'Transcription', prompt: 'Listen to this audio and transcribe EXACTLY what is said. Return ONLY the spoken text/numbers, nothing else. Be precise.' }, { name: 'Captcha-Focused', prompt: 'This is an audio captcha. Listen carefully and identify the letters, numbers, or words being spoken. Ignore any background noise. Return ONLY the captcha answer.' }, { name: 'Phonetic', prompt: 'Listen to this audio and write out what you hear phonetically. If it sounds like letters being spelled out, list each letter. If numbers, list each digit.' }, { name: 'Noise-Filtered', prompt: 'This audio may have distortion or background noise (common in captchas). Focus on the human voice and transcribe what is being said. Ignore beeps, static, or music.' } ]; async function analyzeWithPerspective(audioBase64, perspective) { const startTime = Date.now(); try { const response = await fetch(GEMINI_URL, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ contents: [{ parts: [ { text: perspective.prompt }, { inline_data: { mime_type: 'audio/wav', data: audioBase64 } } ] }] }) }); const data = await response.json(); const result = data.candidates?.[0]?.content?.parts?.[0]?.text || 'NO RESPONSE'; const elapsed = Date.now() - startTime; return { perspective: perspective.name, result: result.trim(), elapsed: `${elapsed}ms` }; } catch (err) { return { perspective: perspective.name, result: `ERROR: ${err.message}`, elapsed: 'N/A' }; } } async function main() { const audioFile = process.argv[2]; if (!audioFile) { console.log('Usage: node multi-analyze.js '); console.log(''); console.log('Or pipe audio: cat audio.wav | node multi-analyze.js -'); process.exit(1); } let audioBuffer; if (audioFile === '-') { // Read from stdin const chunks = []; for await (const chunk of process.stdin) { chunks.push(chunk); } audioBuffer = Buffer.concat(chunks); } else { if (!fs.existsSync(audioFile)) { console.error(`File not found: ${audioFile}`); process.exit(1); } audioBuffer = fs.readFileSync(audioFile); } const audioBase64 = audioBuffer.toString('base64'); console.log('🎧 Spawning', PERSPECTIVES.length, 'Gemini analyzers...\n'); // Run all perspectives in parallel const results = await Promise.all( PERSPECTIVES.map(p => analyzeWithPerspective(audioBase64, p)) ); // Display results console.log('=' .repeat(60)); for (const r of results) { console.log(`\n📊 ${r.perspective} (${r.elapsed}):`); console.log(` "${r.result}"`); } console.log('\n' + '='.repeat(60)); // Find consensus const answers = results.map(r => r.result.toLowerCase().replace(/[^a-z0-9]/g, '')); const counts = {}; for (const a of answers) { counts[a] = (counts[a] || 0) + 1; } const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]); if (sorted.length > 0 && sorted[0][1] > 1) { console.log(`\n🎯 CONSENSUS (${sorted[0][1]}/${results.length} agree): "${sorted[0][0]}"`); } else { console.log('\n⚠️ No consensus - results vary. Check individual outputs.'); } } main().catch(console.error);