From 8168475643f0b1006081254a54fde861c27c85e4 Mon Sep 17 00:00:00 2001 From: Abraham Date: Sat, 29 Apr 2023 16:00:49 +0500 Subject: [PATCH 1/2] Vosk added as wake-word engine option --- src-tauri/src/tauri_commands/listener.rs | 59 +++++++++++++++++++++++- src-tauri/src/vosk.rs | 9 ++-- src/pages/settings.svelte | 1 + 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/src-tauri/src/tauri_commands/listener.rs b/src-tauri/src/tauri_commands/listener.rs index cdd5f9e..c535d56 100644 --- a/src-tauri/src/tauri_commands/listener.rs +++ b/src-tauri/src/tauri_commands/listener.rs @@ -66,6 +66,13 @@ pub fn start_listening(app_handle: tauri::AppHandle) -> Result { events::play("run", &app_handle); }, |app, kidx| keyword_callback(app, kidx)); }, + "vosk" => { + info!("Starting vosk wake-word engine ..."); + return vosk_listen(&app_handle, |_app| { + // Greet user + events::play("run", &app_handle); + }, |app, kidx| keyword_callback(app, kidx)); + }, "picovoice" => { info!("Starting picovoice wake-word engine ..."); return picovoice_listen(&app_handle, |_app| { @@ -100,7 +107,7 @@ pub fn keyword_callback(app_handle: &tauri::AppHandle, _keyword_index: i32) { recorder::read_microphone(&mut frame_buffer); // vosk part (partials included) - if let Some(mut test) = vosk::recognize(&frame_buffer) { + if let Some(mut test) = vosk::recognize(&frame_buffer, false) { if !test.is_empty() { println!("Recognized: {}", test); @@ -156,6 +163,56 @@ pub fn keyword_callback(app_handle: &tauri::AppHandle, _keyword_index: i32) { } } +pub fn vosk_listen<'s, S, K>(app_handle: &tauri::AppHandle, start_callback: S, mut keyword_callback: K) -> Result + where S: Fn(&tauri::AppHandle), + K: FnMut(&tauri::AppHandle, i32) { + + // vars + let fetch_phrase = "джарвис".chars().collect::>(); + let frame_length: usize = 128; + let min_ratio: f64 = 0.8; + + // Start recording + let mut frame_buffer = vec![0; frame_length]; + recorder::FRAME_LENGTH.store(frame_length as u32, Ordering::SeqCst); + recorder::start_recording(); + LISTENING.store(true, Ordering::SeqCst); + + // run start callback + start_callback(app_handle); + + // Listen until stop flag will be true + while !STOP_LISTENING.load(Ordering::SeqCst) { + recorder::read_microphone(&mut frame_buffer); + + // recognize & convert to sequence + let recognized_phrase = vosk::recognize(&frame_buffer, true).unwrap_or("".into()); + + if !recognized_phrase.trim().is_empty() { + info!("Rec: {}", recognized_phrase); + let recognized_phrases = recognized_phrase.split_whitespace(); + for phrase in recognized_phrases { + let recognized_phrase_chars = phrase.trim().to_lowercase().chars().collect::>(); + + // compare + if seqdiff::ratio(&fetch_phrase, &recognized_phrase_chars) >= min_ratio { + info!("Phrase: {:?}", &fetch_phrase); + info!("Compare: {:?}", &recognized_phrase_chars); + keyword_callback(&app_handle, 0); + break; + } + } + } + } + + // Stop listening + recorder::stop_recording(); + LISTENING.store(false, Ordering::SeqCst); + STOP_LISTENING.store(false, Ordering::SeqCst); + + Ok(true) +} + pub fn picovoice_listen<'s, S, K>(app_handle: &tauri::AppHandle, start_callback: S, mut keyword_callback: K) -> Result where S: Fn(&tauri::AppHandle), K: FnMut(&tauri::AppHandle, i32) { diff --git a/src-tauri/src/vosk.rs b/src-tauri/src/vosk.rs index fd84224..57d08ef 100644 --- a/src-tauri/src/vosk.rs +++ b/src-tauri/src/vosk.rs @@ -18,13 +18,16 @@ pub fn init_vosk() { RECOGNIZER.lock().unwrap().set_partial_words(true); } -pub fn recognize(data: &[i16]) -> Option { +pub fn recognize(data: &[i16], include_partial: bool) -> Option { let state = RECOGNIZER.lock().unwrap().accept_waveform(data); match state { DecodingState::Running => { - None - // Some(RECOGNIZER.lock().unwrap().partial_result().partial.into()) + if include_partial { + Some(RECOGNIZER.lock().unwrap().partial_result().partial.into()) + } else { + None + } } DecodingState::Finalized => { // Result will always be multiple because we called set_max_alternatives diff --git a/src/pages/settings.svelte b/src/pages/settings.svelte index 137e2a7..2552871 100644 --- a/src/pages/settings.svelte +++ b/src/pages/settings.svelte @@ -124,6 +124,7 @@ Date: Sat, 29 Apr 2023 16:02:29 +0500 Subject: [PATCH 2/2] bruh --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bab11b9..a3779b3 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,9 @@ This are the neural networks we are currently using: - [~~Silero TTS~~](https://github.com/snakers4/silero-models) (currently not used) - [~~Coqui TTS~~](https://github.com/coqui-ai/TTS) (currently not used) - Wake Word + - [~~Rustpotter~~](https://github.com/GiviMAD/rustpotter) (WIP) - [Picovoice Porcupine](https://github.com/Picovoice/porcupine) via [official SDK](https://github.com/Picovoice/porcupine#rust) - - [~~Rustpotter~~](https://github.com/GiviMAD/rustpotter) (coming soon) + - [Vosk Speech Recognition Toolkit](https://github.com/alphacep/vosk-api) via [Vosk-rs](https://github.com/Bear-03/vosk-rs) - NLU - Nothing yet. - Chat