129 lines
3.8 KiB
JavaScript
Executable File
129 lines
3.8 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
||
/**
|
||
* Multi-Perspective Audio Analyzer
|
||
* Spawns multiple Gemini instances to analyze audio from different angles
|
||
*
|
||
* Usage: node multi-analyze.js <audio-file.wav>
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const GEMINI_KEY = 'AIzaSyClMlVU3Z1jh1UBxTRn25yesH8RU1q_umY';
|
||
const GEMINI_URL = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${GEMINI_KEY}`;
|
||
|
||
// Different analysis perspectives
|
||
const PERSPECTIVES = [
|
||
{
|
||
name: 'Transcription',
|
||
prompt: 'Listen to this audio and transcribe EXACTLY what is said. Return ONLY the spoken text/numbers, nothing else. Be precise.'
|
||
},
|
||
{
|
||
name: 'Captcha-Focused',
|
||
prompt: 'This is an audio captcha. Listen carefully and identify the letters, numbers, or words being spoken. Ignore any background noise. Return ONLY the captcha answer.'
|
||
},
|
||
{
|
||
name: 'Phonetic',
|
||
prompt: 'Listen to this audio and write out what you hear phonetically. If it sounds like letters being spelled out, list each letter. If numbers, list each digit.'
|
||
},
|
||
{
|
||
name: 'Noise-Filtered',
|
||
prompt: 'This audio may have distortion or background noise (common in captchas). Focus on the human voice and transcribe what is being said. Ignore beeps, static, or music.'
|
||
}
|
||
];
|
||
|
||
async function analyzeWithPerspective(audioBase64, perspective) {
|
||
const startTime = Date.now();
|
||
|
||
try {
|
||
const response = await fetch(GEMINI_URL, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
contents: [{
|
||
parts: [
|
||
{ text: perspective.prompt },
|
||
{ inline_data: { mime_type: 'audio/wav', data: audioBase64 } }
|
||
]
|
||
}]
|
||
})
|
||
});
|
||
|
||
const data = await response.json();
|
||
const result = data.candidates?.[0]?.content?.parts?.[0]?.text || 'NO RESPONSE';
|
||
const elapsed = Date.now() - startTime;
|
||
|
||
return {
|
||
perspective: perspective.name,
|
||
result: result.trim(),
|
||
elapsed: `${elapsed}ms`
|
||
};
|
||
} catch (err) {
|
||
return {
|
||
perspective: perspective.name,
|
||
result: `ERROR: ${err.message}`,
|
||
elapsed: 'N/A'
|
||
};
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
const audioFile = process.argv[2];
|
||
|
||
if (!audioFile) {
|
||
console.log('Usage: node multi-analyze.js <audio-file.wav>');
|
||
console.log('');
|
||
console.log('Or pipe audio: cat audio.wav | node multi-analyze.js -');
|
||
process.exit(1);
|
||
}
|
||
|
||
let audioBuffer;
|
||
if (audioFile === '-') {
|
||
// Read from stdin
|
||
const chunks = [];
|
||
for await (const chunk of process.stdin) {
|
||
chunks.push(chunk);
|
||
}
|
||
audioBuffer = Buffer.concat(chunks);
|
||
} else {
|
||
if (!fs.existsSync(audioFile)) {
|
||
console.error(`File not found: ${audioFile}`);
|
||
process.exit(1);
|
||
}
|
||
audioBuffer = fs.readFileSync(audioFile);
|
||
}
|
||
|
||
const audioBase64 = audioBuffer.toString('base64');
|
||
|
||
console.log('🎧 Spawning', PERSPECTIVES.length, 'Gemini analyzers...\n');
|
||
|
||
// Run all perspectives in parallel
|
||
const results = await Promise.all(
|
||
PERSPECTIVES.map(p => analyzeWithPerspective(audioBase64, p))
|
||
);
|
||
|
||
// Display results
|
||
console.log('=' .repeat(60));
|
||
for (const r of results) {
|
||
console.log(`\n📊 ${r.perspective} (${r.elapsed}):`);
|
||
console.log(` "${r.result}"`);
|
||
}
|
||
console.log('\n' + '='.repeat(60));
|
||
|
||
// Find consensus
|
||
const answers = results.map(r => r.result.toLowerCase().replace(/[^a-z0-9]/g, ''));
|
||
const counts = {};
|
||
for (const a of answers) {
|
||
counts[a] = (counts[a] || 0) + 1;
|
||
}
|
||
|
||
const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
|
||
if (sorted.length > 0 && sorted[0][1] > 1) {
|
||
console.log(`\n🎯 CONSENSUS (${sorted[0][1]}/${results.length} agree): "${sorted[0][0]}"`);
|
||
} else {
|
||
console.log('\n⚠️ No consensus - results vary. Check individual outputs.');
|
||
}
|
||
}
|
||
|
||
main().catch(console.error);
|