From e2370dc046e04b0e67453c4184cd4a06283ee72f Mon Sep 17 00:00:00 2001
From: Priler <priler96@gmail.com>
Date: Thu, 8 Jan 2026 00:35:21 +0500
Subject: [PATCH] VAD fixes + some calibrations

---
 crates/jarvis-app/src/_app.rs              | 337 +++++++++++++++++++
 crates/jarvis-app/src/app.rs               | 360 ++++++++++++---------
 crates/jarvis-core/src/audio_buffer.rs     |  41 +++
 crates/jarvis-core/src/config.rs           |   4 +-
 crates/jarvis-core/src/i18n/locales/en.ftl |   2 +-
 crates/jarvis-core/src/i18n/locales/ru.ftl |   2 +-
 crates/jarvis-core/src/i18n/locales/ua.ftl |   2 +-
 crates/jarvis-core/src/lib.rs              |   2 +
 8 files changed, 593 insertions(+), 157 deletions(-)
 create mode 100644 crates/jarvis-app/src/_app.rs
 create mode 100644 crates/jarvis-core/src/audio_buffer.rs
diff --git a/crates/jarvis-app/src/_app.rs b/crates/jarvis-app/src/_app.rs
new file mode 100644
index 0000000..e29e8d1
--- /dev/null
+++ b/crates/jarvis-app/src/_app.rs
@@ -0,0 +1,337 @@
+use std::sync::mpsc::Receiver;
+use std::time::SystemTime;
+
+use jarvis_core::{audio_buffer::AudioRingBuffer, audio, audio_processing, commands, config,  listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}};
+use rand::prelude::*;
+
+use crate::should_stop;
+
+// VAD state machine
+#[derive(Debug, Clone, Copy, PartialEq)]
+enum VadState {
+    WaitingForVoice,
+    VoiceActive,
+}
+
+pub fn start(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
+    // start the loop
+    main_loop(text_cmd_rx)
+}
+
+fn main_loop(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
+    let rt = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime");
+    let mut start: SystemTime;
+    // let sounds_directory = audio::get_sound_directory().unwrap();
+    let frame_length: usize = 512; // default for every wake-word engine
+    let sample_rate: usize = 16000;
+    let mut frame_buffer: Vec<i16> = vec![0; frame_length];
+
+    // ring buffer: keep last 2 seconds of audio
+    let mut audio_buffer = AudioRingBuffer::new(2.0, frame_length, sample_rate);
+
+    // VAD state
+    let mut vad_state = VadState::WaitingForVoice;
+    let mut silence_frames: u32 = 0;
+
+    // how many frames of silence before we consider speech ended
+    // 1.5 seconds = 1.5 * (16000 / 512) ≈ 47 frames
+    // @TODO: Put this to config
+    let silence_threshold: u32 = ((1.5 * sample_rate as f32) / frame_length as f32) as u32;
+
+    // play some startup phrase
+    // audio::play_sound(&sounds_directory.join("run.wav"));
+    voices::play_greet();
+
+    // start recording
+    match recorder::start_recording() {
+        Ok(_) => info!("Recording started."),
+        Err(_) => {
+            error!("Cannot start recording.");
+            return Err(()); // quit
+        }
+    }
+
+    // notify GUI we're ready
+    ipc::send(IpcEvent::Idle);
+
+    // DEBUG counter
+    let mut frame_count: u32 = 0;
+
+    // the loop
+    'wake_word: loop {
+        // check for stop signal
+        if should_stop() {
+            info!("Stop signal received, shutting down...");
+            voices::play_goodbye();
+            ipc::send(IpcEvent::Stopping);
+            break;
+        }
+
+        // check for text commands
+        if let Ok(text) = text_cmd_rx.try_recv() {
+            process_text_command(&text, &rt);
+            continue 'wake_word;
+        }
+
+        // read from microphone
+        recorder::read_microphone(&mut frame_buffer);
+
+        // DEBUG: check raw audio
+        frame_count += 1;
+        let raw_rms = calculate_rms(&frame_buffer);
+
+        if frame_count % 100 == 0 {
+            info!("DEBUG [{}]: raw_rms={:.0}", frame_count, raw_rms);
+        }
+
+        // check if we're getting any audio at all
+        if frame_count == 100 && raw_rms < 10.0 {
+            warn!("WARNING: Microphone appears to be silent! RMS={:.0}", raw_rms);
+        }
+
+        // process audio (gain -> noise suppression -> VAD)
+        let processed = audio_processing::process(&frame_buffer);
+
+        if frame_count % 100 == 0 {
+            info!("DEBUG [{}]: is_voice={}, vad_conf={:.2}, processed_rms={:.0}", 
+                frame_count,
+                processed.is_voice, 
+                processed.vad_confidence,
+                calculate_rms(&processed.samples)
+            );
+        }
+
+        // skip if no voice detected (vad)
+        if !processed.is_voice {
+            continue 'wake_word;
+        }
+
+        // DEBUG: we passed VAD
+        if frame_count % 50 == 0 {
+            info!("DEBUG: Voice detected, checking wake word...");
+        }
+
+        // recognize wake-word
+        match listener::data_callback(&frame_buffer) {
+            Some(_keyword_index) => {
+                // notify GUI
+                ipc::send(IpcEvent::WakeWordDetected);
+
+                // reset some things
+                stt::reset_wake_recognizer();
+                stt::reset_speech_recognizer();
+                audio_processing::reset();
+
+                // wake-word activated, process further commands
+                // capture current time
+                start = SystemTime::now();
+                silence_frames = 0;
+
+                // play some reply phrase
+                // @TODO. Make it via commands or upcoming events system.
+                voices::play_reply();
+
+
+                // notify GUI we're listening
+                ipc::send(IpcEvent::Listening);
+
+                // wait for voice commands
+                'voice_recognition: loop {
+                    // check for stop
+                    if should_stop() {
+                        break 'wake_word;
+                    }
+
+                    // read from microphone
+                    recorder::read_microphone(&mut frame_buffer);
+
+                    // process first
+                    let processed = audio_processing::process(&frame_buffer);
+
+                    // detect silence, return to wake-word if silence
+                    if processed.is_voice {
+                        silence_frames = 0;
+                    } else {
+                        silence_frames += 1;
+                        if silence_frames > config::VAD_SILENCE_FRAMES * 2 {
+                            info!("Long silence detected, returning to wake word mode.");
+                            break 'voice_recognition;
+                        }
+                    }
+
+                    // stt part (without partials)
+                    if let Some(mut recognized_voice) = stt::recognize(&frame_buffer, false) {
+                        // something was recognized
+                        info!("Recognized voice: {}", recognized_voice);
+
+                        // notify GUI
+                        ipc::send(IpcEvent::SpeechRecognized {
+                            text: recognized_voice.clone(),
+                        });
+
+                        // filter recognized voice
+                        // @TODO. Better recognized voice filtration.
+                        recognized_voice = recognized_voice.to_lowercase();
+
+                        // answer again if it's activation phrase repeated
+                        if recognized_voice.contains(config::VOSK_FETCH_PHRASE) {
+                            info!("Wake word detected during chaining, reactivating...");
+                            
+                            // play greet sound
+                            // audio::play_sound(&sounds_directory.join(format!(
+                            //     "{}.wav",
+                            //     config::ASSISTANT_GREET_PHRASES
+                            //         .choose(&mut rand::thread_rng())
+                            //         .unwrap()
+                            // )));
+                            voices::play_reply();
+                            
+                            // reset timer and continue listening
+                            start = SystemTime::now();
+                            silence_frames = 0;
+                            stt::reset_speech_recognizer();
+
+                            ipc::send(IpcEvent::Listening);
+                            continue 'voice_recognition;
+                        }
+
+                        // filter out activation phrase from command
+                        for tbr in config::ASSISTANT_PHRASES_TBR {
+                            recognized_voice = recognized_voice.replace(tbr, "");
+                        }
+                        recognized_voice = recognized_voice.trim().into();
+
+                        // skip if nothing left after filtering (*evil laugh*)
+                        if recognized_voice.is_empty() {
+                            continue 'voice_recognition;
+                        }
+
+                        // execute command (shared executor)
+                        execute_command(&recognized_voice, &rt);
+
+                        // return to wake-word listening after command execution (no matter successful or not)
+                        break 'voice_recognition;
+                    }
+
+                    // only recognize voice for a certain period of time
+                    match start.elapsed() {
+                        Ok(elapsed) if elapsed > config::CMS_WAIT_DELAY => {
+                            // return to wake-word listening after N seconds
+                            break 'voice_recognition;
+                        }
+                        _ => (),
+                    }
+
+                    // reset things
+                    stt::reset_wake_recognizer();
+                    audio_processing::reset();
+                    ipc::send(IpcEvent::Idle);
+                }
+            }
+            None => (),
+        }
+    }
+
+    // cleanup
+    recorder::stop_recording().ok();
+    ipc::send(IpcEvent::Stopping);
+
+    Ok(())
+}
+
+
+// process text command from GUI
+fn process_text_command(text: &str, rt: &tokio::runtime::Runtime) {
+    info!("Processing text command: {}", text);
+    
+    ipc::send(IpcEvent::SpeechRecognized { text: text.to_string() });
+    
+    // filter text same as voice
+    let mut filtered = text.to_lowercase();
+    for tbr in config::ASSISTANT_PHRASES_TBR {
+        filtered = filtered.replace(tbr, "");
+    }
+    let filtered = filtered.trim();
+    
+    if filtered.is_empty() {
+        ipc::send(IpcEvent::Idle);
+        return;
+    }
+    
+    execute_command(filtered, rt);
+}
+
+// shared command execution logic (manual & voice)
+fn execute_command(text: &str, rt: &tokio::runtime::Runtime) {
+    let commands_list = match COMMANDS_LIST.get() {
+        Some(c) => c,
+        None => {
+            ipc::send(IpcEvent::Error { message: "Commands not loaded".to_string() });
+            ipc::send(IpcEvent::Idle);
+            return;
+        }
+    };
+    
+    // let sounds_directory = audio::get_sound_directory().unwrap();
+    
+    // try intent recognition first, fallback to levenshtein
+    let cmd_result = if let Some((intent_id, confidence)) = 
+        rt.block_on(intent::classify(text)) 
+    {
+        info!("Intent recognized: {} (confidence: {:.2})", intent_id, confidence);
+        intent::get_command_by_intent(commands_list, &intent_id)
+    } else {
+        info!("Intent not recognized, trying levenshtein fallback...");
+        commands::fetch_command(text, commands_list)
+    };
+    
+    if let Some((cmd_path, cmd_config)) = cmd_result {
+        info!("Command found: {:?}", cmd_path);
+        
+        match commands::execute_command(&cmd_path, &cmd_config) {
+            Ok(_) => {
+                info!("Command executed successfully");
+                voices::play_ok(); // command executed sound
+                ipc::send(IpcEvent::CommandExecuted {
+                    id: cmd_config.id.clone(),
+                    success: true,
+                });
+            }
+            Err(msg) => {
+                error!("Error executing command: {}", msg);
+                voices::play_error();
+                ipc::send(IpcEvent::CommandExecuted {
+                    id: cmd_config.id.clone(),
+                    success: false,
+                });
+                ipc::send(IpcEvent::Error { message: msg.to_string() });
+            }
+        }
+    } else {
+        info!("No command found for: {}", text);
+        // play "not understood" sound
+        // audio::play_sound(&sounds_directory.join("not_understand.wav"));
+        voices::play_not_found();
+        ipc::send(IpcEvent::Error { 
+            message: format!("Command not found: {}", text) 
+        });
+    }
+    
+    ipc::send(IpcEvent::Idle);
+}
+
+
+fn keyword_callback(keyword_index: i32) {}
+
+pub fn close(code: i32) {
+    info!("Closing application.");
+    voices::play_goodbye();
+    ipc::send(IpcEvent::Stopping);
+    std::process::exit(code);
+}
+
+fn calculate_rms(samples: &[i16]) -> f32 {
+    if samples.is_empty() { return 0.0; }
+    let sum: f64 = samples.iter().map(|&s| (s as f64).powi(2)).sum();
+    (sum / samples.len() as f64).sqrt() as f32
+}
diff --git a/crates/jarvis-app/src/app.rs b/crates/jarvis-app/src/app.rs
index ea2db38..f20430f 100644
--- a/crates/jarvis-app/src/app.rs
+++ b/crates/jarvis-app/src/app.rs
@@ -1,43 +1,52 @@
 use std::sync::mpsc::Receiver;
 use std::time::SystemTime;
 
-use jarvis_core::{audio, audio_processing, commands, config,  listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}};
-use rand::prelude::*;
+use jarvis_core::{audio_buffer::AudioRingBuffer, audio_processing, commands, config, listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}};
 
 use crate::should_stop;
 
+// VAD state machine
+#[derive(Debug, Clone, Copy, PartialEq)]
+enum VadState {
+    WaitingForVoice,
+    VoiceActive,
+}
+
 pub fn start(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
-    // start the loop
     main_loop(text_cmd_rx)
 }
 
 fn main_loop(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
     let rt = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime");
-    let mut start: SystemTime;
-    // let sounds_directory = audio::get_sound_directory().unwrap();
-    let frame_length: usize = 512; // default for every wake-word engine
+    let frame_length: usize = 512;
+    let sample_rate: usize = 16000;
     let mut frame_buffer: Vec<i16> = vec![0; frame_length];
+    
+    // ring buffer: keeps last 2 seconds of audio (pre-roll)
+    let mut audio_buffer = AudioRingBuffer::new(2.0, frame_length, sample_rate);
+    
+    // VAD state
+    let mut vad_state = VadState::WaitingForVoice;
     let mut silence_frames: u32 = 0;
-
-    // play some startup phrase
-    // audio::play_sound(&sounds_directory.join("run.wav"));
+    
+    // how many frames of silence before we consider speech ended
+    // 1.5 seconds = 1.5 * (16000 / 512) ≈ 47 frames
+    let silence_threshold: u32 = ((1.5 * sample_rate as f32) / frame_length as f32) as u32;
+    
     voices::play_greet();
 
-    // start recording
     match recorder::start_recording() {
         Ok(_) => info!("Recording started."),
         Err(_) => {
             error!("Cannot start recording.");
-            return Err(()); // quit
+            return Err(());
         }
     }
 
-    // notify GUI we're ready
     ipc::send(IpcEvent::Idle);
 
-    // the loop
+    // ### WAKE WORD DETECTION LOOP
     'wake_word: loop {
-        // check for stop signal
         if should_stop() {
             info!("Stop signal received, shutting down...");
             voices::play_goodbye();
@@ -45,145 +54,78 @@ fn main_loop(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
             break;
         }
 
-        // check for text commands
         if let Ok(text) = text_cmd_rx.try_recv() {
             process_text_command(&text, &rt);
             continue 'wake_word;
         }
 
-        // read from microphone
         recorder::read_microphone(&mut frame_buffer);
-
-        // process audio (gain -> noise suppression -> VAD)
         let processed = audio_processing::process(&frame_buffer);
-
-        // skip if no voice detected (vad)
-        if !processed.is_voice {
-            continue 'wake_word;
-        }
-
-        // recognize wake-word
-        match listener::data_callback(&frame_buffer) {
-            Some(_keyword_index) => {
-                // notify GUI
-                ipc::send(IpcEvent::WakeWordDetected);
-
-                // reset some things
-                stt::reset_wake_recognizer();
-                stt::reset_speech_recognizer();
-                audio_processing::reset();
-
-                // wake-word activated, process further commands
-                // capture current time
-                start = SystemTime::now();
-                silence_frames = 0;
-
-                // play some reply phrase
-                // @TODO. Make it via commands or upcoming events system.
-                voices::play_reply();
-
-
-                // notify GUI we're listening
-                ipc::send(IpcEvent::Listening);
-
-                // wait for voice commands
-                'voice_recognition: loop {
-                    // check for stop
-                    if should_stop() {
-                        break 'wake_word;
+        
+        match vad_state {
+            VadState::WaitingForVoice => {
+                // always buffer audio
+                audio_buffer.push(&frame_buffer);
+                
+                if processed.is_voice {
+                    // voice started! flush buffer to Vosk
+                    info!("VAD: Voice started, flushing {} buffered frames", audio_buffer.len());
+                    
+                    for buffered_frame in audio_buffer.drain_all() {
+                        listener::data_callback(&buffered_frame);
                     }
-
-                    // read from microphone
-                    recorder::read_microphone(&mut frame_buffer);
-
-                    // process first
-                    let processed = audio_processing::process(&frame_buffer);
-
-                    // detect silence, return to wake-word if silence
-                    if processed.is_voice {
-                        silence_frames = 0;
-                    } else {
-                        silence_frames += 1;
-                        if silence_frames > config::VAD_SILENCE_FRAMES * 2 {
-                            info!("Long silence detected, returning to wake word mode.");
-                            break 'voice_recognition;
-                        }
-                    }
-
-                    // stt part (without partials)
-                    if let Some(mut recognized_voice) = stt::recognize(&frame_buffer, false) {
-                        // something was recognized
-                        info!("Recognized voice: {}", recognized_voice);
-
-                        // notify GUI
-                        ipc::send(IpcEvent::SpeechRecognized {
-                            text: recognized_voice.clone(),
-                        });
-
-                        // filter recognized voice
-                        // @TODO. Better recognized voice filtration.
-                        recognized_voice = recognized_voice.to_lowercase();
-
-                        // answer again if it's activation phrase repeated
-                        if recognized_voice.contains(config::VOSK_FETCH_PHRASE) {
-                            info!("Wake word detected during chaining, reactivating...");
-                            
-                            // play greet sound
-                            // audio::play_sound(&sounds_directory.join(format!(
-                            //     "{}.wav",
-                            //     config::ASSISTANT_GREET_PHRASES
-                            //         .choose(&mut rand::thread_rng())
-                            //         .unwrap()
-                            // )));
-                            voices::play_reply();
-                            
-                            // reset timer and continue listening
-                            start = SystemTime::now();
-                            silence_frames = 0;
-                            stt::reset_speech_recognizer();
-
-                            ipc::send(IpcEvent::Listening);
-                            continue 'voice_recognition;
-                        }
-
-                        // filter out activation phrase from command
-                        for tbr in config::ASSISTANT_PHRASES_TBR {
-                            recognized_voice = recognized_voice.replace(tbr, "");
-                        }
-                        recognized_voice = recognized_voice.trim().into();
-
-                        // skip if nothing left after filtering (*evil laugh*)
-                        if recognized_voice.is_empty() {
-                            continue 'voice_recognition;
-                        }
-
-                        // execute command (shared executor)
-                        execute_command(&recognized_voice, &rt);
-
-                        // return to wake-word listening after command execution (no matter successful or not)
-                        break 'voice_recognition;
-                    }
-
-                    // only recognize voice for a certain period of time
-                    match start.elapsed() {
-                        Ok(elapsed) if elapsed > config::CMS_WAIT_DELAY => {
-                            // return to wake-word listening after N seconds
-                            break 'voice_recognition;
-                        }
-                        _ => (),
-                    }
-
-                    // reset things
+                    
+                    vad_state = VadState::VoiceActive;
+                    silence_frames = 0;
+                }
+            }
+            
+            VadState::VoiceActive => {
+                // feed to wake word detector
+                if let Some(_keyword_index) = listener::data_callback(&frame_buffer) {
+                    // WAKE WORD DETECTED!
+                    info!("Wake word activated!");
+                    ipc::send(IpcEvent::WakeWordDetected);
+                    
+                    stt::reset_wake_recognizer();
+                    stt::reset_speech_recognizer();
+                    audio_processing::reset();
+                    
+                    voices::play_reply();
+                    ipc::send(IpcEvent::Listening);
+                    
+                    // enter voice recognition mode
+                    recognize_command(&mut frame_buffer, &rt, frame_length, sample_rate);
+                    
+                    // reset state after command
+                    vad_state = VadState::WaitingForVoice;
+                    silence_frames = 0;
+                    audio_buffer.clear();
                     stt::reset_wake_recognizer();
                     audio_processing::reset();
                     ipc::send(IpcEvent::Idle);
+                    
+                    continue 'wake_word;
+                }
+                
+                // track silence
+                if processed.is_voice {
+                    silence_frames = 0;
+                } else {
+                    silence_frames += 1;
+                    
+                    if silence_frames > silence_threshold {
+                        // silence timeout, back to waiting
+                        debug!("VAD: Silence timeout, returning to wait state");
+                        vad_state = VadState::WaitingForVoice;
+                        silence_frames = 0;
+                        stt::reset_wake_recognizer();
+                    }
                 }
             }
-            None => (),
         }
     }
 
-    // cleanup
     recorder::stop_recording().ok();
     ipc::send(IpcEvent::Stopping);
 
@@ -191,13 +133,129 @@ fn main_loop(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
 }
 
 
-// process text command from GUI
+// Voice recognition for command after wake word
+fn recognize_command(
+    frame_buffer: &mut [i16],
+    rt: &tokio::runtime::Runtime,
+    frame_length: usize,
+    sample_rate: usize,
+) {
+    let mut audio_buffer = AudioRingBuffer::new(2.0, frame_length, sample_rate);
+    let mut vad_state = VadState::WaitingForVoice;
+    let mut silence_frames: u32 = 0;
+    let mut start = SystemTime::now();
+    
+    // longer silence threshold for commands (user might pause to think)
+    // 2 seconds
+    let silence_threshold: u32 = ((2.0 * sample_rate as f32) / frame_length as f32) as u32;
+    
+    loop {
+        if crate::should_stop() {
+            return;
+        }
+        
+        recorder::read_microphone(frame_buffer);
+        let processed = audio_processing::process(frame_buffer);
+        
+        match vad_state {
+            VadState::WaitingForVoice => {
+                audio_buffer.push(frame_buffer);
+                
+                if processed.is_voice {
+                    // flush buffer to STT
+                    for buffered_frame in audio_buffer.drain_all() {
+                        stt::recognize(&buffered_frame, false);
+                    }
+                    vad_state = VadState::VoiceActive;
+                    silence_frames = 0;
+                }
+            }
+            
+            VadState::VoiceActive => {
+                // feed to STT
+                if let Some(mut recognized_voice) = stt::recognize(frame_buffer, false) {
+                    info!("Recognized voice: {}", recognized_voice);
+                    
+                    ipc::send(IpcEvent::SpeechRecognized {
+                        text: recognized_voice.clone(),
+                    });
+                    
+                    recognized_voice = recognized_voice.to_lowercase();
+                    
+                    // check if wake word repeated (reactivate)
+                    if recognized_voice.contains(config::VOSK_FETCH_PHRASE) {
+                        info!("Wake word detected during chaining, reactivating...");
+                        voices::play_reply();
+                        stt::reset_speech_recognizer();
+                        ipc::send(IpcEvent::Listening);
+                        
+                        // reset for next command
+                        vad_state = VadState::WaitingForVoice;
+                        silence_frames = 0;
+                        start = SystemTime::now();
+                        audio_buffer.clear();
+                        continue;
+                    }
+                    
+                    // filter activation phrases
+                    for tbr in config::ASSISTANT_PHRASES_TBR {
+                        recognized_voice = recognized_voice.replace(tbr, "");
+                    }
+                    recognized_voice = recognized_voice.trim().to_string();
+                    
+                    if recognized_voice.is_empty() {
+                        continue;
+                    }
+                    
+                    // execute command and check if we should chain
+                    let should_chain = execute_command(&recognized_voice, rt);
+                    
+                    if should_chain {
+                        // chain: reset and continue listening
+                        info!("Chaining enabled, continuing to listen...");
+                        stt::reset_speech_recognizer();
+                        vad_state = VadState::WaitingForVoice;
+                        silence_frames = 0;
+                        start = SystemTime::now();
+                        audio_buffer.clear();
+                        ipc::send(IpcEvent::Listening);
+                        continue;
+                    } else {
+                        // no chain: return to wake word
+                        return;
+                    }
+                }
+                
+                // track silence
+                if processed.is_voice {
+                    silence_frames = 0;
+                } else {
+                    silence_frames += 1;
+                    
+                    if silence_frames > silence_threshold {
+                        info!("Long silence detected, returning to wake word mode.");
+                        return;
+                    }
+                }
+            }
+        }
+        
+        // timeout
+        if let Ok(elapsed) = start.elapsed() {
+            if elapsed > config::CMS_WAIT_DELAY {
+                info!("Command timeout, returning to wake word mode.");
+                return;
+            }
+        }
+    }
+}
+
+
 fn process_text_command(text: &str, rt: &tokio::runtime::Runtime) {
     info!("Processing text command: {}", text);
     
     ipc::send(IpcEvent::SpeechRecognized { text: text.to_string() });
     
-    // filter text same as voice
     let mut filtered = text.to_lowercase();
     for tbr in config::ASSISTANT_PHRASES_TBR {
         filtered = filtered.replace(tbr, "");
@@ -209,23 +267,22 @@ fn process_text_command(text: &str, rt: &tokio::runtime::Runtime) {
         return;
     }
     
+    // text commands never chain
     execute_command(filtered, rt);
 }
 
-// shared command execution logic (manual & voice)
-fn execute_command(text: &str, rt: &tokio::runtime::Runtime) {
+
+// Execute command, returns true if chaining should continue
+fn execute_command(text: &str, rt: &tokio::runtime::Runtime) -> bool {
     let commands_list = match COMMANDS_LIST.get() {
         Some(c) => c,
         None => {
             ipc::send(IpcEvent::Error { message: "Commands not loaded".to_string() });
             ipc::send(IpcEvent::Idle);
-            return;
+            return false;
         }
     };
     
-    // let sounds_directory = audio::get_sound_directory().unwrap();
-    
-    // try intent recognition first, fallback to levenshtein
     let cmd_result = if let Some((intent_id, confidence)) = 
         rt.block_on(intent::classify(text)) 
     {
@@ -240,13 +297,15 @@ fn execute_command(text: &str, rt: &tokio::runtime::Runtime) {
         info!("Command found: {:?}", cmd_path);
         
         match commands::execute_command(&cmd_path, &cmd_config) {
-            Ok(_) => {
+            Ok(chain) => {
                 info!("Command executed successfully");
-                voices::play_ok(); // command executed sound
+                voices::play_ok();
                 ipc::send(IpcEvent::CommandExecuted {
                     id: cmd_config.id.clone(),
                     success: true,
                 });
+                ipc::send(IpcEvent::Idle);
+                return chain; // return chain status from command
             }
             Err(msg) => {
                 error!("Error executing command: {}", msg);
@@ -260,8 +319,6 @@ fn execute_command(text: &str, rt: &tokio::runtime::Runtime) {
         }
     } else {
         info!("No command found for: {}", text);
-        // play "not understood" sound
-        // audio::play_sound(&sounds_directory.join("not_understand.wav"));
         voices::play_not_found();
         ipc::send(IpcEvent::Error { 
             message: format!("Command not found: {}", text) 
@@ -269,14 +326,13 @@ fn execute_command(text: &str, rt: &tokio::runtime::Runtime) {
     }
     
     ipc::send(IpcEvent::Idle);
+    false // no chain on error or not found
 }
 
 
-fn keyword_callback(keyword_index: i32) {}
-
 pub fn close(code: i32) {
     info!("Closing application.");
     voices::play_goodbye();
     ipc::send(IpcEvent::Stopping);
     std::process::exit(code);
-}
+}
\ No newline at end of file
diff --git a/crates/jarvis-core/src/audio_buffer.rs b/crates/jarvis-core/src/audio_buffer.rs
new file mode 100644
index 0000000..ad8f915
--- /dev/null
+++ b/crates/jarvis-core/src/audio_buffer.rs
@@ -0,0 +1,41 @@
+use std::collections::VecDeque;
+
+pub struct AudioRingBuffer {
+    buffer: VecDeque<Vec<i16>>,
+    max_frames: usize,
+}
+
+impl AudioRingBuffer {
+    // Create buffer that holds `seconds` worth of audio at given frame_size and sample_rate
+    pub fn new(seconds: f32, frame_size: usize, sample_rate: usize) -> Self {
+        let frames_per_second = sample_rate / frame_size;
+        let max_frames = (frames_per_second as f32 * seconds) as usize;
+        
+        Self {
+            buffer: VecDeque::with_capacity(max_frames),
+            max_frames,
+        }
+    }
+    
+    // Push a frame, dropping oldest if full
+    pub fn push(&mut self, frame: &[i16]) {
+        if self.buffer.len() >= self.max_frames {
+            self.buffer.pop_front();
+        }
+        self.buffer.push_back(frame.to_vec());
+    }
+    
+    // Drain all buffered frames into a single vec
+    pub fn drain_all(&mut self) -> Vec<Vec<i16>> {
+        self.buffer.drain(..).collect()
+    }
+    
+    // Get frame count
+    pub fn len(&self) -> usize {
+        self.buffer.len()
+    }
+    
+    pub fn clear(&mut self) {
+        self.buffer.clear();
+    }
+}
\ No newline at end of file
diff --git a/crates/jarvis-core/src/config.rs b/crates/jarvis-core/src/config.rs
index b9c8cc4..7b7fe6f 100644
--- a/crates/jarvis-core/src/config.rs
+++ b/crates/jarvis-core/src/config.rs
@@ -163,8 +163,8 @@ pub const DEFAULT_VAD: VadBackend = VadBackend::Energy;
 pub const DEFAULT_GAIN_NORMALIZER: bool = false;
 
 // VAD settings
-pub const VAD_ENERGY_THRESHOLD: f32 = 500.0;  // RMS threshold for energy-based VAD
-pub const VAD_NNNOISELESS_THRESHOLD: f32 = 0.5;  // probability threshold for nnnoiseless
+pub const VAD_ENERGY_THRESHOLD: f32 = 100.0;  // RMS threshold for energy-based VAD
+pub const VAD_NNNOISELESS_THRESHOLD: f32 = 0.8;  // probability threshold for nnnoiseless
 pub const VAD_SILENCE_FRAMES: u32 = 15;  // frames of silence before speech end (~480ms)
 
 // gain normalizer settings
diff --git a/crates/jarvis-core/src/i18n/locales/en.ftl b/crates/jarvis-core/src/i18n/locales/en.ftl
index f4adde8..88a9922 100644
--- a/crates/jarvis-core/src/i18n/locales/en.ftl
+++ b/crates/jarvis-core/src/i18n/locales/en.ftl
@@ -63,7 +63,7 @@ settings-stt-engine = Speech recognition
 settings-intent-engine = Intent recognition
 settings-intent-engine-desc = Select neural network for command recognition.
 settings-noise-suppression = Noise suppression
-settings-noise-suppression-desc = Reduces background noise.
+settings-noise-suppression-desc = Reduces background noise. May negatively affect recognition.
 settings-vad = Voice detection (VAD)
 settings-vad-desc = Skips silence, saves CPU resources.
 settings-gain-normalizer = Gain normalizer
diff --git a/crates/jarvis-core/src/i18n/locales/ru.ftl b/crates/jarvis-core/src/i18n/locales/ru.ftl
index 9ece828..2a7838e 100644
--- a/crates/jarvis-core/src/i18n/locales/ru.ftl
+++ b/crates/jarvis-core/src/i18n/locales/ru.ftl
@@ -63,7 +63,7 @@ settings-stt-engine = Распознавание речи
 settings-intent-engine = Определение намерения
 settings-intent-engine-desc = Выберите нейросеть для распознавания команд.
 settings-noise-suppression = Шумоподавление
-settings-noise-suppression-desc = Уменьшает фоновый шум.
+settings-noise-suppression-desc = Уменьшает фоновый шум. Может негативно влиять на распознавание.
 settings-vad = Определение голоса (VAD)
 settings-vad-desc = Пропускает тишину, экономит ресурсы CPU.
 settings-gain-normalizer = Нормализация громкости
diff --git a/crates/jarvis-core/src/i18n/locales/ua.ftl b/crates/jarvis-core/src/i18n/locales/ua.ftl
index 3e3e145..fdadd6d 100644
--- a/crates/jarvis-core/src/i18n/locales/ua.ftl
+++ b/crates/jarvis-core/src/i18n/locales/ua.ftl
@@ -63,7 +63,7 @@ settings-stt-engine = Розпізнавання мовлення
 settings-intent-engine = Визначення наміру
 settings-intent-engine-desc = Виберіть нейромережу для розпізнавання команд.
 settings-noise-suppression = Шумозаглушення
-settings-noise-suppression-desc = Зменшує фоновий шум.
+settings-noise-suppression-desc = Зменшує фоновий шум. Може негативно впливати на розпізнавання.
 settings-vad = Визначення голосу (VAD)
 settings-vad-desc = Пропускає тишу, економить ресурси CPU.
 settings-gain-normalizer = Нормалізація гучності
diff --git a/crates/jarvis-core/src/lib.rs b/crates/jarvis-core/src/lib.rs
index d9dbebf..43f5381 100644
--- a/crates/jarvis-core/src/lib.rs
+++ b/crates/jarvis-core/src/lib.rs
@@ -36,6 +36,8 @@ pub mod ipc;
 
 pub mod voices;
 
+pub mod audio_buffer;
+
 // shared statics
 // pub static APP_DIR: Lazy<PathBuf> = Lazy::new(|| std::env::current_dir().unwrap());
 pub static APP_DIR: Lazy<PathBuf> = Lazy::new(|| {