2026-01-28 23:00:58 -05:00

129 lines
3.8 KiB
JavaScript
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* Multi-Perspective Audio Analyzer
* Spawns multiple Gemini instances to analyze audio from different angles
*
* Usage: node multi-analyze.js <audio-file.wav>
*/
const fs = require('fs');
const path = require('path');
const GEMINI_KEY = 'AIzaSyClMlVU3Z1jh1UBxTRn25yesH8RU1q_umY';
const GEMINI_URL = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${GEMINI_KEY}`;
// Different analysis perspectives
const PERSPECTIVES = [
{
name: 'Transcription',
prompt: 'Listen to this audio and transcribe EXACTLY what is said. Return ONLY the spoken text/numbers, nothing else. Be precise.'
},
{
name: 'Captcha-Focused',
prompt: 'This is an audio captcha. Listen carefully and identify the letters, numbers, or words being spoken. Ignore any background noise. Return ONLY the captcha answer.'
},
{
name: 'Phonetic',
prompt: 'Listen to this audio and write out what you hear phonetically. If it sounds like letters being spelled out, list each letter. If numbers, list each digit.'
},
{
name: 'Noise-Filtered',
prompt: 'This audio may have distortion or background noise (common in captchas). Focus on the human voice and transcribe what is being said. Ignore beeps, static, or music.'
}
];
async function analyzeWithPerspective(audioBase64, perspective) {
const startTime = Date.now();
try {
const response = await fetch(GEMINI_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
contents: [{
parts: [
{ text: perspective.prompt },
{ inline_data: { mime_type: 'audio/wav', data: audioBase64 } }
]
}]
})
});
const data = await response.json();
const result = data.candidates?.[0]?.content?.parts?.[0]?.text || 'NO RESPONSE';
const elapsed = Date.now() - startTime;
return {
perspective: perspective.name,
result: result.trim(),
elapsed: `${elapsed}ms`
};
} catch (err) {
return {
perspective: perspective.name,
result: `ERROR: ${err.message}`,
elapsed: 'N/A'
};
}
}
async function main() {
const audioFile = process.argv[2];
if (!audioFile) {
console.log('Usage: node multi-analyze.js <audio-file.wav>');
console.log('');
console.log('Or pipe audio: cat audio.wav | node multi-analyze.js -');
process.exit(1);
}
let audioBuffer;
if (audioFile === '-') {
// Read from stdin
const chunks = [];
for await (const chunk of process.stdin) {
chunks.push(chunk);
}
audioBuffer = Buffer.concat(chunks);
} else {
if (!fs.existsSync(audioFile)) {
console.error(`File not found: ${audioFile}`);
process.exit(1);
}
audioBuffer = fs.readFileSync(audioFile);
}
const audioBase64 = audioBuffer.toString('base64');
console.log('🎧 Spawning', PERSPECTIVES.length, 'Gemini analyzers...\n');
// Run all perspectives in parallel
const results = await Promise.all(
PERSPECTIVES.map(p => analyzeWithPerspective(audioBase64, p))
);
// Display results
console.log('=' .repeat(60));
for (const r of results) {
console.log(`\n📊 ${r.perspective} (${r.elapsed}):`);
console.log(` "${r.result}"`);
}
console.log('\n' + '='.repeat(60));
// Find consensus
const answers = results.map(r => r.result.toLowerCase().replace(/[^a-z0-9]/g, ''));
const counts = {};
for (const a of answers) {
counts[a] = (counts[a] || 0) + 1;
}
const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
if (sorted.length > 0 && sorted[0][1] > 1) {
console.log(`\n🎯 CONSENSUS (${sorted[0][1]}/${results.length} agree): "${sorted[0][0]}"`);
} else {
console.log('\n⚠ No consensus - results vary. Check individual outputs.');
}
}
main().catch(console.error);