diff --git a/memory/memories.db-wal b/memory/memories.db-wal index f3e10e806..4746166bc 100644 Binary files a/memory/memories.db-wal and b/memory/memories.db-wal differ diff --git a/tools/meeting-notes/dist/index.js b/tools/meeting-notes/dist/index.js index f9df452ea..af3060d62 100644 --- a/tools/meeting-notes/dist/index.js +++ b/tools/meeting-notes/dist/index.js @@ -27,20 +27,17 @@ function getEnvList(key) { const value = getEnv(key); return value.split(',').map(s => s.trim()).filter(Boolean); } -async function transcribeWithOnnxAsr(wavPath) { +const PYWHISPER_PYTHON = '/home/nicholai/.local/share/hyprwhspr/venv/bin/python3'; +const PYWHISPER_MODEL = 'small.en'; +async function transcribeWithWhisper(wavPath) { return new Promise((resolve) => { - const python = spawn('python3', [ + const python = spawn(PYWHISPER_PYTHON, [ '-c', ` -import sys -import os -os.environ['ORT_LOGGING_LEVEL'] = '4' -sys.path.insert(0, '/usr/lib/hyprwhspr/lib/src') -import onnx_asr -model = onnx_asr.load_model('nemo-parakeet-tdt-0.6b-v3', quantization='int8') -with open('${wavPath}', 'rb') as f: - result = model.transcribe(f.read()) -print(result if isinstance(result, str) else result.get('text', '')) +from pywhispercpp.model import Model +model = Model('${PYWHISPER_MODEL}') +segments = model.transcribe('${wavPath}') +print(' '.join(seg.text for seg in segments)) `.trim(), ]); let output = ''; @@ -53,7 +50,7 @@ print(result if isinstance(result, str) else result.get('text', '')) }); python.on('close', (code) => { if (code !== 0) { - console.error(`[TRANSCRIBE] onnx_asr failed: ${error.slice(0, 200)}`); + console.error(`[TRANSCRIBE] pywhispercpp failed (exit ${code}): ${error.slice(0, 200)}`); resolve(''); return; } @@ -240,7 +237,7 @@ class MeetingNotesBot { console.error(`[FFMPEG] Failed with code ${code}`); return; } - const text = await transcribeWithOnnxAsr(wavFile); + const text = await transcribeWithWhisper(wavFile); unlinkSync(wavFile); if (text) { const segment = {