diff --git a/.gitignore b/.gitignore index 203bccd..ca439dc 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,6 @@ tree.txt # Ignore packaged crates *.crate + +# Ignore AI models +/resources/models/* diff --git a/Cargo.lock b/Cargo.lock index 6187876..334cbb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3305,12 +3305,15 @@ dependencies = [ "kira", "log", "mlua", + "ndarray", "nnnoiseless", "once_cell", + "ort", "parking_lot", "platform-dirs", "pv_recorder", "rand 0.8.5", + "regex", "reqwest 0.13.1", "rodio", "rustpotter", @@ -3320,6 +3323,7 @@ dependencies = [ "serde_yaml", "sha2", "tempfile", + "tokenizers", "tokio", "tokio-tungstenite", "toml 0.9.11+spec-1.1.0", diff --git a/Cargo.toml b/Cargo.toml index 940a05c..a0e87a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,4 +45,10 @@ mlua = { version = "0.11.5", features = ["lua55", "vendored", "async", "serde"] reqwest = { version = "0.13.1", features = ["blocking", "json"] } tempfile = "^3.24" winrt-notification = "0.5" -fastembed = { version = "^5.8.1", default-features = false, features = ["ort-download-binaries"] } \ No newline at end of file + +fastembed = { version = "^5.8.1", default-features = false, features = ["ort-download-binaries"] } +ort = { version = "=2.0.0-rc.11" } +ndarray = "0.17" +tokenizers = { version = "0.22", default-features = false } +regex = "1" + diff --git a/crates/jarvis-app/src/app.rs b/crates/jarvis-app/src/app.rs index bd482af..2c73f48 100644 --- a/crates/jarvis-app/src/app.rs +++ b/crates/jarvis-app/src/app.rs @@ -1,7 +1,7 @@ use std::sync::mpsc::Receiver; use std::time::SystemTime; -use jarvis_core::{audio_buffer::AudioRingBuffer, audio_processing, commands, config, listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}, i18n}; +use jarvis_core::{audio_buffer::AudioRingBuffer, audio_processing, commands, config, listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}, i18n, slots}; use rand::seq::SliceRandom; use crate::should_stop; @@ -23,8 +23,8 @@ fn main_loop(text_cmd_rx: Receiver) -> Result<(), ()> { let sample_rate: usize = 16000; let mut frame_buffer: Vec = vec![0; frame_length]; - // ring buffer: keeps last 2 seconds of audio (pre-roll) - let mut audio_buffer = AudioRingBuffer::new(2.0, frame_length, sample_rate); + // ring buffer: keeps last 5 seconds of audio (pre-roll) + let mut audio_buffer = AudioRingBuffer::new(5.0, frame_length, sample_rate); // VAD state let mut vad_state = VadState::WaitingForVoice; @@ -162,8 +162,8 @@ fn recognize_command( let mut first_recognition = prefed_audio; // longer silence threshold for commands (user might pause to think) - // 2 seconds - let silence_threshold: u32 = ((2.0 * sample_rate as f32) / frame_length as f32) as u32; + // 5 seconds + let silence_threshold: u32 = ((5.0 * sample_rate as f32) / frame_length as f32) as u32; loop { if crate::should_stop() { @@ -262,6 +262,11 @@ fn recognize_command( recognized_voice = recognized_voice.trim().to_string(); + if recognized_voice.len() < 5 { + debug!("Ignoring too short recognition: '{}'", recognized_voice); + continue; + } + if recognized_voice.is_empty() { continue; } @@ -360,7 +365,18 @@ fn execute_command(text: &str, rt: &tokio::runtime::Runtime) -> bool { if let Some((cmd_path, cmd_config)) = cmd_result { info!("Command found: {:?}", cmd_path); - match commands::execute_command(&cmd_path, &cmd_config, Some(&text)) { + // extract slots if needed + let extracted_slots = if !cmd_config.slots.is_empty() { + let s = slots::extract(text, &cmd_config.slots); + if !s.is_empty() { + info!("Extracted slots: {:?}", s); + } + Some(s) + } else { + None + }; + + match commands::execute_command(&cmd_path, &cmd_config, Some(&text), extracted_slots.as_ref()) { Ok(chain) => { info!("Command executed successfully"); // voices::play_ok(); diff --git a/crates/jarvis-app/src/main.rs b/crates/jarvis-app/src/main.rs index 9f26262..63d36ca 100644 --- a/crates/jarvis-app/src/main.rs +++ b/crates/jarvis-app/src/main.rs @@ -1,3 +1,4 @@ +use jarvis_core::slots; use parking_lot::RwLock; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; @@ -105,6 +106,9 @@ fn main() -> Result<(), String> { } }); + // init slots parsing engine + slots::init().map_err(|e| error!("Slot extraction init failed: {}", e)).ok(); + // init audio processing info!("Initializing audio processing..."); if let Err(e) = audio_processing::init() { diff --git a/crates/jarvis-core/Cargo.toml b/crates/jarvis-core/Cargo.toml index e120000..4bfda46 100644 --- a/crates/jarvis-core/Cargo.toml +++ b/crates/jarvis-core/Cargo.toml @@ -43,13 +43,21 @@ reqwest = { workspace = true, optional = true } tempfile.workspace = true fastembed = { workspace = true, optional = true } +ort = { workspace = true, optional = true } +ndarray = { workspace = true, optional = true } +tokenizers = { workspace = true, optional = true } +regex = { workspace = true, optional = true } [target.'cfg(windows)'.dependencies] winrt-notification = { workspace = true, optional = true } [features] default = ["jarvis_app"] -jarvis_app = ["vosk", "intent-classifier", "fastembed", "tokio", "nnnoiseless", "tokio-tungstenite", "futures-util", "lua"] +jarvis_app = [ + "vosk", "intent-classifier", "fastembed", "tokio", "nnnoiseless", "tokio-tungstenite", "futures-util", + "lua", + "ort", "ndarray", "tokenizers", "regex",] + intent = ["intent-classifier", "tokio"] lua = ["mlua", "reqwest", "winrt-notification"] lua_only = ["lua", "tokio"] \ No newline at end of file diff --git a/crates/jarvis-core/src/audio_processing/noise_suppression/nnnoiseless.rs b/crates/jarvis-core/src/audio_processing/noise_suppression/nnnoiseless.rs index 6ec4c27..cd8da34 100644 --- a/crates/jarvis-core/src/audio_processing/noise_suppression/nnnoiseless.rs +++ b/crates/jarvis-core/src/audio_processing/noise_suppression/nnnoiseless.rs @@ -45,7 +45,9 @@ impl NnnoiselessNS { } pub fn reset(&mut self) { - self.state = DenoiseState::new(); + // self.state = DenoiseState::new(); + // self.buffer.clear(); + self.buffer.clear(); } } \ No newline at end of file diff --git a/crates/jarvis-core/src/commands.rs b/crates/jarvis-core/src/commands.rs index eced6f5..be59186 100644 --- a/crates/jarvis-core/src/commands.rs +++ b/crates/jarvis-core/src/commands.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::fs; use std::time::Duration; @@ -191,7 +192,8 @@ pub fn execute_cli(cmd: &str, args: &[String]) -> std::io::Result { } } -pub fn execute_command(cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option<&str>) -> Result { +pub fn execute_command(cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option<&str>, slots: Option<&HashMap>) -> Result { + // execute command by the type match cmd_config.cmd_type.as_str() { // BRUH @@ -200,7 +202,7 @@ pub fn execute_command(cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option // LUA command #[cfg(feature = "lua")] "lua" => { - execute_lua_command(cmd_path, cmd_config, phrase) + execute_lua_command(cmd_path, cmd_config, phrase, slots) } // AutoHotkey command @@ -254,8 +256,10 @@ fn execute_lua_command( cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option<&str>, + slots: Option<&HashMap> ) -> Result { // get script path + let script_name = if cmd_config.script.is_empty() { "script.lua" } else { @@ -270,13 +274,14 @@ fn execute_lua_command( // parse sandbox level let sandbox = SandboxLevel::from_str(&cmd_config.sandbox); - + // create context let context = CommandContext { phrase: phrase.unwrap_or("").to_string(), command_id: cmd_config.id.clone(), command_path: cmd_path.clone(), language: i18n::get_language(), + slots: slots.map(|s| s.clone()), }; // get timeout diff --git a/crates/jarvis-core/src/commands/structs.rs b/crates/jarvis-core/src/commands/structs.rs index a7f2736..18a182d 100644 --- a/crates/jarvis-core/src/commands/structs.rs +++ b/crates/jarvis-core/src/commands/structs.rs @@ -59,6 +59,9 @@ pub struct JCommand { #[serde(default)] pub phrases: HashMap>, + // Slot definitions: slot_name -> how to extract it + #[serde(default)] + pub slots: HashMap, // CACHE #[serde(skip, default)] @@ -90,6 +93,8 @@ impl Clone for JCommand { sounds: self.sounds.clone(), phrases: self.phrases.clone(), + slots: self.slots.clone(), + // empty caches for cloned instance sounds_cache: RwLock::new(HashMap::new()), phrases_cache: RwLock::new(HashMap::new()), @@ -150,4 +155,25 @@ impl JCommand { // fallback to first available map.values().next().cloned().unwrap_or_default() } +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SlotDefinition { + // Entity label for GLiNER (e.g. "city name", "song title", "number") + // This is a free-form description - GLiNER matches it semantically + #[serde(default)] + pub entity: String, + + // Optional: fallback context words for template-based extraction + // e.g. ["in", "for", "at"] for a city slot + #[serde(default)] + pub context: Vec, +} + +// Extracted slot value passed to commands +#[derive(Debug, Clone, Serialize)] +#[serde(untagged)] +pub enum SlotValue { + Text(String), + Number(f64), } \ No newline at end of file diff --git a/crates/jarvis-core/src/config.rs b/crates/jarvis-core/src/config.rs index 4b88b1e..cf9374d 100644 --- a/crates/jarvis-core/src/config.rs +++ b/crates/jarvis-core/src/config.rs @@ -18,6 +18,7 @@ use rustpotter::{ }; use crate::IntentRecognitionEngine; +use crate::SlotExtractionEngine; use crate::config::structs::NoiseSuppressionBackend; use crate::config::structs::VadBackend; use crate::{APP_CONFIG_DIR, APP_DIRS, APP_LOG_DIR}; @@ -156,6 +157,9 @@ pub const VOSK_SPEECH_PARTIAL_WORDS: bool = false; // IRE (intents recognition) pub const INTENT_CLASSIFIER_MIN_CONFIDENCE: f64 = 0.75; +// SLOTS EXTRACTION +pub const DEFAULT_SLOT_EXTRACTION_ENGINE: SlotExtractionEngine = SlotExtractionEngine::None; + // embedding classifier pub const EMBEDDING_MIN_CONFIDENCE: f64 = 0.70; diff --git a/crates/jarvis-core/src/config/structs.rs b/crates/jarvis-core/src/config/structs.rs index fdcbe99..f64e171 100644 --- a/crates/jarvis-core/src/config/structs.rs +++ b/crates/jarvis-core/src/config/structs.rs @@ -45,6 +45,11 @@ pub enum AudioType { Kira, } +#[derive(Clone, Copy, Serialize, Deserialize, Debug, PartialEq)] +pub enum SlotExtractionEngine { + None, + GLiNER, +} impl fmt::Display for WakeWordEngine { @@ -77,6 +82,12 @@ impl fmt::Display for VadBackend { } } +impl fmt::Display for SlotExtractionEngine { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + // pub enum TextToSpeechEngine {} // pub enum IntentRecognitionEngine {} diff --git a/crates/jarvis-core/src/db/structs.rs b/crates/jarvis-core/src/db/structs.rs index e7a1c8f..1fa7afc 100644 --- a/crates/jarvis-core/src/db/structs.rs +++ b/crates/jarvis-core/src/db/structs.rs @@ -6,6 +6,7 @@ use crate::config::structs::WakeWordEngine; use crate::config::structs::IntentRecognitionEngine; use crate::config::structs::NoiseSuppressionBackend; use crate::config::structs::VadBackend; +use crate::config::structs::SlotExtractionEngine; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Settings { @@ -14,6 +15,10 @@ pub struct Settings { pub wake_word_engine: WakeWordEngine, pub intent_recognition_engine: IntentRecognitionEngine, + + pub slot_extraction_engine: SlotExtractionEngine, + pub gliner_model: String, + pub speech_to_text_engine: SpeechToTextEngine, pub vosk_model: String, @@ -35,6 +40,8 @@ impl Default for Settings { wake_word_engine: config::DEFAULT_WAKE_WORD_ENGINE, intent_recognition_engine: config::DEFAULT_INTENT_RECOGNITION_ENGINE, + slot_extraction_engine: SlotExtractionEngine::None, + gliner_model: String::new(), speech_to_text_engine: config::DEFAULT_SPEECH_TO_TEXT_ENGINE, vosk_model: String::from(""), // auto detect first available diff --git a/crates/jarvis-core/src/gliner_models.rs b/crates/jarvis-core/src/gliner_models.rs new file mode 100644 index 0000000..82a88b4 --- /dev/null +++ b/crates/jarvis-core/src/gliner_models.rs @@ -0,0 +1,130 @@ +use std::collections::HashMap; +use std::fs; +use crate::APP_DIR; + +const GLINER_DIRS: &[&str] = &["gliner_small-v2.1", "gliner_multi-v2.1"]; + +#[derive(Debug, Clone)] +pub struct GlinerModelVariant { + // type id stored in settings, e.g. "int8", "fp16", "full" + pub value: String, + // shown in dropdown, e.g. "int8 (174MB / 332MB)" + pub display_name: String, +} + +// scan both model dirs and return a deduplicated list of model types +pub fn scan_gliner_variants() -> Vec { + let base = APP_DIR.join("resources").join("models"); + + // collect: type -> { dir_name -> size_mb } + let mut types: HashMap> = HashMap::new(); + + for dir_name in GLINER_DIRS { + let onnx_dir = base.join(dir_name).join("onnx"); + if !onnx_dir.exists() { continue; } + + let entries = match fs::read_dir(&onnx_dir) { + Ok(e) => e, + Err(_) => continue, + }; + + for entry in entries.flatten() { + let path = entry.path(); + let file_name = match path.file_name().and_then(|n| n.to_str()) { + Some(n) if n.ends_with(".onnx") => n.to_string(), + _ => continue, + }; + + let variant_type = file_name_to_type(&file_name); + let size_mb = fs::metadata(&path).map(|m| m.len()).unwrap_or(0) / (1024 * 1024); + + types.entry(variant_type) + .or_default() + .insert(dir_name.to_string(), size_mb); + } + } + + let mut result: Vec = types.into_iter().map(|(variant, sizes)| { + let size_str = build_size_string(&sizes); + let label = if variant == "full" { "Full".to_string() } else { variant.clone() }; + GlinerModelVariant { + display_name: format!("{} ({})", label, size_str), + value: variant, + } + }).collect(); + + // sort: full first, then alphabetically + result.sort_by(|a, b| { + let a_full = a.value == "full"; + let b_full = b.value == "full"; + b_full.cmp(&a_full).then_with(|| a.value.cmp(&b.value)) + }); + + result +} + +// "model.onnx" -> "full", "model_int8.onnx" -> "int8" +fn file_name_to_type(name: &str) -> String { + let stem = name.strip_suffix(".onnx").unwrap_or(name); + if stem == "model" { + "full".to_string() + } else if let Some(variant) = stem.strip_prefix("model_") { + variant.to_string() + } else { + stem.to_string() + } +} + +// build size display: "174MB" if only one dir, "small: 174MB / multi: 332MB" if both +fn build_size_string(sizes: &HashMap) -> String { + if sizes.len() == 1 { + let (dir, mb) = sizes.iter().next().unwrap(); + let short = short_dir_name(dir); + return format!("{}: {}MB", short, mb); + } + + let mut parts: Vec = Vec::new(); + for dir_name in GLINER_DIRS { + if let Some(mb) = sizes.get(*dir_name) { + parts.push(format!("{}: {}MB", short_dir_name(dir_name), mb)); + } + } + parts.join(" / ") +} + +fn short_dir_name(dir: &str) -> &str { + if dir.contains("small") { "small" } + else if dir.contains("multi") { "multi" } + else { dir } +} + +// resolve variant type + language into actual file path +// returns (model_dir_path, onnx_file_name) or None +pub fn resolve_model(variant: &str, language: &str) -> Option<(std::path::PathBuf, String)> { + let base = APP_DIR.join("resources").join("models"); + let file_name = type_to_file_name(variant); + + // pick dir based on language + let preferred: &[&str] = match language { + "en" => &["gliner_small-v2.1", "gliner_multi-v2.1"], + _ => &["gliner_multi-v2.1", "gliner_small-v2.1"], + }; + + for dir_name in preferred { + let path = base.join(dir_name).join("onnx").join(&file_name); + if path.exists() { + return Some((base.join(dir_name), file_name)); + } + } + + None +} + +// "full" -> "model.onnx", "int8" -> "model_int8.onnx" +fn type_to_file_name(variant: &str) -> String { + if variant == "full" || variant.is_empty() { + "model.onnx".to_string() + } else { + format!("model_{}.onnx", variant) + } +} \ No newline at end of file diff --git a/crates/jarvis-core/src/i18n/locales/en.ftl b/crates/jarvis-core/src/i18n/locales/en.ftl index 202c797..2237f84 100644 --- a/crates/jarvis-core/src/i18n/locales/en.ftl +++ b/crates/jarvis-core/src/i18n/locales/en.ftl @@ -122,6 +122,15 @@ notification-error = Error notification-assistant-started = Assistant started notification-assistant-stopped = Assistant stopped +# SLOTS EXTRACTION +settings-slot-engine = Slot extraction +settings-slot-engine-desc = Extract parameters from voice commands (e.g. city name, number). +settings-gliner-model = GLiNER ONNX model +settings-gliner-model-desc = + Select model variant. + Smaller quantized models (int8, uint8) are faster but less accurate. +settings-gliner-models-hint = No GLiNER models found. + # ETC search-error-not-running = Assistant is not running search-error-failed = Failed to execute command diff --git a/crates/jarvis-core/src/i18n/locales/ru.ftl b/crates/jarvis-core/src/i18n/locales/ru.ftl index a3c43c7..0c16a63 100644 --- a/crates/jarvis-core/src/i18n/locales/ru.ftl +++ b/crates/jarvis-core/src/i18n/locales/ru.ftl @@ -122,6 +122,15 @@ notification-error = Ошибка notification-assistant-started = Ассистент запущен notification-assistant-stopped = Ассистент остановлен +# SLOTS EXTRACTION +settings-slot-engine = Извлечение параметров +settings-slot-engine-desc = Извлекает параметры из голосовых команд (напр. название города, число). +settings-gliner-model = Модель GLiNER ONNX +settings-gliner-model-desc = + Выберите вариант модели. + Квантизированные модели (int8, uint8) быстрее, но менее точны. +settings-gliner-models-hint = Модели GLiNER не найдены. + # ETC search-error-not-running = Ассистент не запущен search-error-failed = Не удалось выполнить команду diff --git a/crates/jarvis-core/src/i18n/locales/ua.ftl b/crates/jarvis-core/src/i18n/locales/ua.ftl index a4054ba..658946c 100644 --- a/crates/jarvis-core/src/i18n/locales/ua.ftl +++ b/crates/jarvis-core/src/i18n/locales/ua.ftl @@ -122,8 +122,16 @@ notification-error = Помилка notification-assistant-started = Асистент запущено notification-assistant-stopped = Асистент зупинено -# ETC +# SLOTS EXTRACTION +settings-slot-engine = Витяг параметрів +settings-slot-engine-desc = Витягує параметри з голосових команд (напр. назва міста, число). +settings-gliner-model = Модель GLiNER ONNX +settings-gliner-model-desc = + Оберіть варіант моделі. + Квантизовані моделі (int8, uint8) швидші, але менш точні. +settings-gliner-models-hint = Моделі GLiNER не знайдено. +# ETC search-error-not-running = Асистент не запущено search-error-failed = Не вдалося виконати команду settings-no-voices = Голоси не знайдено \ No newline at end of file diff --git a/crates/jarvis-core/src/intent/embeddingclassifier.rs b/crates/jarvis-core/src/intent/embeddingclassifier.rs index a4f49c7..b363e03 100644 --- a/crates/jarvis-core/src/intent/embeddingclassifier.rs +++ b/crates/jarvis-core/src/intent/embeddingclassifier.rs @@ -40,10 +40,12 @@ pub fn init(commands: &[JCommandsList]) -> Result<(), String> { match i18n::get_language().as_str() { "en" => { // smaller model for English + info!("Loading all-MiniLM-L6-v2 ..."); model_dir = APP_DIR.join("resources").join("models").join("all-MiniLM-L6-v2"); }, _ => { // bigger model for any other languages (multilingual) + info!("Loading paraphrase-multilingual-MiniLM-L12-v2-onnx-Q ..."); model_dir = APP_DIR.join("resources").join("models").join("paraphrase-multilingual-MiniLM-L12-v2-onnx-Q"); } } diff --git a/crates/jarvis-core/src/lib.rs b/crates/jarvis-core/src/lib.rs index 37cd78e..4151e55 100644 --- a/crates/jarvis-core/src/lib.rs +++ b/crates/jarvis-core/src/lib.rs @@ -26,7 +26,11 @@ pub mod stt; #[cfg(feature = "intent")] pub mod intent; +#[cfg(feature = "jarvis_app")] +pub mod slots; + pub mod vosk_models; +pub mod gliner_models; #[cfg(feature = "jarvis_app")] pub mod audio_processing; diff --git a/crates/jarvis-core/src/lua/api/context.rs b/crates/jarvis-core/src/lua/api/context.rs index c74bb45..2d75b8f 100644 --- a/crates/jarvis-core/src/lua/api/context.rs +++ b/crates/jarvis-core/src/lua/api/context.rs @@ -3,6 +3,8 @@ use mlua::{Lua, Table}; use crate::lua::{CommandContext}; +use crate::commands::SlotValue; + pub fn register(lua: &Lua, jarvis: &Table, ctx: &CommandContext) -> mlua::Result<()> { let context = lua.create_table()?; @@ -25,6 +27,18 @@ pub fn register(lua: &Lua, jarvis: &Table, ctx: &CommandContext) -> mlua::Result time.set("timestamp", now.timestamp())?; context.set("time", time)?; + // slots + let slots_table = lua.create_table()?; + if let Some(ref slots) = ctx.slots { + for (name, value) in slots { + match value { + SlotValue::Text(t) => slots_table.set(name.as_str(), t.as_str())?, + SlotValue::Number(n) => slots_table.set(name.as_str(), *n)?, + } + } + } + context.set("slots", slots_table)?; + jarvis.set("context", context)?; Ok(()) diff --git a/crates/jarvis-core/src/lua/structs.rs b/crates/jarvis-core/src/lua/structs.rs index 3cc9f11..1e27097 100644 --- a/crates/jarvis-core/src/lua/structs.rs +++ b/crates/jarvis-core/src/lua/structs.rs @@ -1,4 +1,6 @@ -use std::path::PathBuf; +use std::{collections::HashMap, path::PathBuf}; + +use crate::commands::SlotValue; // Context passed to Lua scripts #[derive(Debug, Clone)] @@ -14,6 +16,9 @@ pub struct CommandContext { // Current language pub language: String, + + // Slots + pub slots: Option> } // Result returned from Lua script execution diff --git a/crates/jarvis-core/src/lua/tests.rs b/crates/jarvis-core/src/lua/tests.rs index e558a69..f2035ee 100644 --- a/crates/jarvis-core/src/lua/tests.rs +++ b/crates/jarvis-core/src/lua/tests.rs @@ -13,6 +13,7 @@ mod tests { command_id: "test_cmd".to_string(), command_path: cmd_path, language: "en".to_string(), + slots: None, } } diff --git a/crates/jarvis-core/src/slots.rs b/crates/jarvis-core/src/slots.rs new file mode 100644 index 0000000..f490ad1 --- /dev/null +++ b/crates/jarvis-core/src/slots.rs @@ -0,0 +1,58 @@ +mod gliner; + +use std::collections::HashMap; +use once_cell::sync::OnceCell; + +use crate::commands::{SlotDefinition, SlotValue}; +use crate::config::structs::SlotExtractionEngine; +use crate::DB; + +static SLOT_ENGINE: OnceCell = OnceCell::new(); + +pub fn init() -> Result<(), String> { + if SLOT_ENGINE.get().is_some() { + return Ok(()); + } + + let engine = DB.get() + .map(|db| db.read().slot_extraction_engine) + .unwrap_or(SlotExtractionEngine::None); + + SLOT_ENGINE.set(engine).map_err(|_| "Slot engine already set")?; + + match engine { + SlotExtractionEngine::None => { + info!("Slot extraction disabled"); + } + SlotExtractionEngine::GLiNER => { + info!("Initializing GLiNER slot extraction backend."); + gliner::init()?; + info!("GLiNER slot extraction backend initialized."); + } + } + + Ok(()) +} + +// Extract slot values from text using the configured engine +pub fn extract( + text: &str, + slots: &HashMap, +) -> HashMap { + if slots.is_empty() { + return HashMap::new(); + } + + match SLOT_ENGINE.get().unwrap_or(&SlotExtractionEngine::None) { + SlotExtractionEngine::None => HashMap::new(), + SlotExtractionEngine::GLiNER => { + match gliner::extract(text, slots) { + Ok(result) => result, + Err(e) => { + error!("GLiNER slot extraction failed: {}", e); + HashMap::new() + } + } + } + } +} \ No newline at end of file diff --git a/crates/jarvis-core/src/slots/gliner.rs b/crates/jarvis-core/src/slots/gliner.rs new file mode 100644 index 0000000..d9a54c6 --- /dev/null +++ b/crates/jarvis-core/src/slots/gliner.rs @@ -0,0 +1,487 @@ +// BASED ON: gline-rs crate source code +// https://github.com/fbilhaut/gline-rs + +use std::collections::HashMap; +use std::path::PathBuf; +use once_cell::sync::OnceCell; +use parking_lot::Mutex; +use ndarray::Array; +use regex::Regex; +use tokenizers::Tokenizer; +use ort::value::Tensor; + +pub mod structs; +use structs::GlinerModelInfo; + +use std::fs; + +use crate::commands::{SlotDefinition, SlotValue}; +use crate::{APP_DIR, i18n}; + +// MODEL STATE + +struct GlinerModel { + session: ort::session::Session, + tokenizer: Tokenizer, + splitter: Regex, +} + +unsafe impl Send for GlinerModel {} +unsafe impl Sync for GlinerModel {} + +static MODEL: OnceCell> = OnceCell::new(); + +// GLiNER defaults (same as gline-rs Parameters::default()) +const THRESHOLD: f32 = 0.3; +const MAX_WIDTH: usize = 12; +const MAX_LENGTH: usize = 512; + +// applied after decoding +const MIN_CONFIDENCE: f32 = 0.4; + +// word splitting regex (gline-rs RegexSplitter default) +const WORD_REGEX: &str = r"\w+(?:[-_]\w+)*|\S"; + +// INIT + +pub fn init() -> Result<(), String> { + if MODEL.get().is_some() { + return Ok(()); + } + + let variant = crate::DB.get() + .map(|db| db.read().gliner_model.clone()) + .unwrap_or_default(); + + let language = i18n::get_language(); + + let (model_dir, onnx_file) = if variant.is_empty() { + (select_model_dir(), "model.onnx".to_string()) + } else { + crate::gliner_models::resolve_model(&variant, &language) + .unwrap_or_else(|| (select_model_dir(), "model.onnx".to_string())) + }; + + let model_path = model_dir.join("onnx").join(&onnx_file); + let tokenizer_path = model_dir.join("tokenizer.json"); + + info!("Loading GLiNER model from: {}, variant {}", model_dir.display(), variant); + + let session = ort::session::Session::builder() + .map_err(|e| format!("Failed to create ort session builder: {}", e))? + .commit_from_file(&model_path) + .map_err(|e| format!("Failed to load ONNX model: {}", e))?; + + let tokenizer = Tokenizer::from_file(&tokenizer_path) + .map_err(|e| format!("Failed to load tokenizer: {}", e))?; + + let splitter = Regex::new(WORD_REGEX) + .map_err(|e| format!("Failed to compile word regex: {}", e))?; + + MODEL.set(Mutex::new(GlinerModel { session, tokenizer, splitter })) + .map_err(|_| "GLiNER model already initialized".to_string())?; + + info!("GLiNER model loaded"); + Ok(()) +} + +fn select_model_dir() -> PathBuf { + let base = APP_DIR.join("resources").join("models"); + + match i18n::get_language().as_str() { + "en" => { + let path = base.join("gliner_small-v2.1"); + if path.exists() { return path; } + } + _ => {} + } + + // multilingual (covers RU, UA, EN) + let multi = base.join("gliner_multi-v2.1"); + if multi.exists() { return multi; } + + // fallback + base.join("gliner_small-v2.1") +} + +// WORD SPLITTING + +struct WordToken { + start: usize, + end: usize, + text: String, +} + +fn split_words(splitter: &Regex, text: &str, limit: Option) -> Vec { + let mut tokens = Vec::new(); + for m in splitter.find_iter(text) { + tokens.push(WordToken { + start: m.start(), + end: m.end(), + text: m.as_str().to_string(), + }); + if let Some(lim) = limit { + if tokens.len() >= lim { break; } + } + } + tokens +} + +// PROMPT CONSTRUCTION +// +// GLiNER prompt format: +// [<>, label1_w1, label1_w2, <>, label2_w1, ..., <>, word1, word2, ..., wordN] + +fn build_prompt(entities: &[&str], words: &[WordToken]) -> (Vec, usize) { + let mut prompt = Vec::with_capacity(entities.len() * 2 + 1 + words.len()); + + for entity in entities { + prompt.push("<>".to_string()); + prompt.push(entity.to_string()); // whole string, no split + } + prompt.push("<>".to_string()); + + let entities_len = prompt.len(); + + for w in words { + prompt.push(w.text.clone()); + } + + (prompt, entities_len) +} + +// ENCODING + +struct EncodedBatch { + input_ids: ndarray::Array2, + attention_masks: ndarray::Array2, + word_masks: ndarray::Array2, + text_lengths: ndarray::Array2, + num_words: usize, +} + +fn encode_single( + tokenizer: &Tokenizer, + _text: &str, + entities: &[&str], + words: &[WordToken], +) -> Result { + let (prompt, ent_len) = build_prompt(entities, words); + let text_word_count = words.len(); + + let mut word_encodings: Vec> = Vec::with_capacity(prompt.len()); + let mut total_tokens: usize = 2; // BOS + EOS + let mut entity_tokens: usize = 0; + + for (pos, word) in prompt.iter().enumerate() { + let encoding = tokenizer.encode(word.as_str(), false) + .map_err(|e| format!("Tokenizer encode error: {}", e))?; + let ids = encoding.get_ids().to_vec(); + total_tokens += ids.len(); + if pos < ent_len { + entity_tokens += ids.len(); + } + word_encodings.push(ids); + } + + // text_offset: index where text tokens start (after BOS + entity tokens) + let text_offset = entity_tokens + 1; + + // DEBUG + debug!("GLiNER prompt ({} total, ent_len={}, text_offset={}):", prompt.len(), ent_len, text_offset); + for (i, (word, enc)) in prompt.iter().zip(word_encodings.iter()).enumerate() { + debug!(" [{}]{} '{}' -> {:?}", i, if i < ent_len { " ENT" } else { " TXT" }, word, enc); + } + + let mut input_ids = Array::zeros((1, total_tokens)); + let mut attention_masks = Array::zeros((1, total_tokens)); + let mut word_masks = Array::zeros((1, total_tokens)); + + let mut idx: usize = 0; + let mut word_id: i64 = 0; + + // BOS + input_ids[[0, idx]] = 1; + attention_masks[[0, idx]] = 1; + idx += 1; + + // encode each word - matching gline-rs idx-based logic exactly + for word_enc in word_encodings.iter() { + for (token_idx, &token_id) in word_enc.iter().enumerate() { + input_ids[[0, idx]] = token_id as i64; + attention_masks[[0, idx]] = 1; + // word mask: only for text tokens (past text_offset), first sub-token only + if idx >= text_offset && token_idx == 0 { + word_masks[[0, idx]] = word_id; + } + idx += 1; + } + // increment word_id for any word whose tokens end past text_offset + if idx >= text_offset { + word_id += 1; + } + } + + // EOS + input_ids[[0, idx]] = 2; + attention_masks[[0, idx]] = 1; + + let mut text_lengths = Array::zeros((1, 1)); + text_lengths[[0, 0]] = (text_word_count + 1) as i64; + + debug!("GLiNER input_ids: {:?}", input_ids.as_slice().unwrap()); + debug!("GLiNER word_masks: {:?}", word_masks.as_slice().unwrap()); + debug!("GLiNER text_lengths: {}", text_word_count); + + Ok(EncodedBatch { + input_ids, + attention_masks, + word_masks, + text_lengths, + num_words: text_word_count + 1, + }) +} + +// SPAN TENSORS + +fn make_span_tensors(num_words: usize, max_width: usize) -> (ndarray::Array3, ndarray::Array2) { + let num_spans = num_words * max_width; + + let mut span_idx = Array::zeros((1, num_spans, 2)); + let mut span_mask = Array::from_elem((1, num_spans), false); + + for start in 0..num_words { + let remaining = num_words - start; + let actual_max = max_width.min(remaining); + for width in 0..actual_max { + let dim = start * max_width + width; + span_idx[[0, dim, 0]] = start as i64; + span_idx[[0, dim, 1]] = (start + width) as i64; + span_mask[[0, dim]] = true; + } + } + + (span_idx, span_mask) +} + +// DECODE + GREEDY SEARCH + +fn sigmoid(x: f32) -> f32 { + 1.0 / (1.0 + (-x).exp()) +} + +struct Entity { + text: String, + label: String, + prob: f32, + start: usize, + end: usize, +} + +fn decode_and_search( + logits_data: &[f32], + logits_shape: &[usize], + words: &[WordToken], + text: &str, + entities: &[&str], + max_width: usize, + threshold: f32, +) -> Vec { + let num_tokens = words.len(); + + let dim_mw = logits_shape.get(2).copied().unwrap_or(0); + let dim_e = logits_shape.get(3).copied().unwrap_or(0); + + let mut spans: Vec = Vec::new(); + + for start in 1..=num_tokens { + let max_end = (start + max_width).min(num_tokens + 1); + for end in start..max_end { + let width = end - start; + for (class_idx, &entity_label) in entities.iter().enumerate() { + let flat_idx = start * dim_mw * dim_e + width * dim_e + class_idx; + if flat_idx >= logits_data.len() { continue; } + + let raw_score = logits_data[flat_idx]; + let prob = sigmoid(raw_score); + if prob >= threshold { + let w_start = start - 1; + let w_end = end - 1; + let start_offset = words[w_start].start; + let end_offset = words[w_end].end; + let span_text = text[start_offset..end_offset].to_string(); + spans.push(Entity { + text: span_text, + label: entity_label.to_string(), + prob, + start: start_offset, + end: end_offset, + }); + } + } + } + } + + spans.sort_unstable_by(|a, b| (a.start, a.end).cmp(&(b.start, b.end))); + greedy_flat(&spans) +} + +fn greedy_flat(spans: &[Entity]) -> Vec { + if spans.is_empty() { + return Vec::new(); + } + + let mut result: Vec = Vec::new(); + let mut prev = 0usize; + let mut next = 1usize; + + while next < spans.len() { + let p = &spans[prev]; + let n = &spans[next]; + + if n.start >= p.end || p.start >= n.end { + result.push(Entity { + text: p.text.clone(), + label: p.label.clone(), + prob: p.prob, + start: p.start, + end: p.end, + }); + prev = next; + } else if p.prob < n.prob { + prev = next; + } + next += 1; + } + + let last = &spans[prev]; + result.push(Entity { + text: last.text.clone(), + label: last.label.clone(), + prob: last.prob, + start: last.start, + end: last.end, + }); + + result +} + +// PUBLIC API + +pub fn extract( + text: &str, + slots: &HashMap, +) -> Result, String> { + let mut model = MODEL.get().ok_or("GLiNER not initialized")?.lock(); + + let mut label_to_slots: HashMap<&str, Vec<&str>> = HashMap::new(); + for (slot_name, def) in slots { + if !def.entity.is_empty() { + label_to_slots + .entry(def.entity.as_str()) + .or_default() + .push(slot_name.as_str()); + } + } + + if label_to_slots.is_empty() { + return Ok(HashMap::new()); + } + + let labels: Vec<&str> = label_to_slots.keys().copied().collect(); + + debug!("GLiNER extract: text='{}', labels={:?}", text, labels); + + let words = split_words(&model.splitter, text, Some(MAX_LENGTH)); + if words.is_empty() { + return Ok(HashMap::new()); + } + + let encoded = encode_single(&model.tokenizer, text, &labels, &words)?; + + let (span_idx, span_mask) = make_span_tensors(encoded.num_words, MAX_WIDTH); + + let t_input_ids = Tensor::from_array(encoded.input_ids).map_err(|e| format!("tensor: {}", e))?; + let t_attn = Tensor::from_array(encoded.attention_masks).map_err(|e| format!("tensor: {}", e))?; + let t_words = Tensor::from_array(encoded.word_masks).map_err(|e| format!("tensor: {}", e))?; + let t_lengths = Tensor::from_array(encoded.text_lengths).map_err(|e| format!("tensor: {}", e))?; + let t_span_idx = Tensor::from_array(span_idx).map_err(|e| format!("tensor: {}", e))?; + let t_span_mask = Tensor::from_array(span_mask).map_err(|e| format!("tensor: {}", e))?; + + let outputs = model.session.run( + ort::inputs! { + "input_ids" => t_input_ids, + "attention_mask" => t_attn, + "words_mask" => t_words, + "text_lengths" => t_lengths, + "span_idx" => t_span_idx, + "span_mask" => t_span_mask, + } + ).map_err(|e| format!("ort inference error: {}", e))?; + + let (shape, logits_data) = outputs["logits"] + .try_extract_tensor::() + .map_err(|e| format!("Failed to extract logits: {}", e))?; + + let logits_shape: Vec = shape.iter().map(|&d| d as usize).collect(); + + debug!("GLiNER logits shape: {:?}, data len: {}", logits_shape, logits_data.len()); + let max_logit = logits_data.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + debug!("GLiNER max logit: {:.4}, sigmoid: {:.4}", max_logit, sigmoid(max_logit)); + + // dump all scores above 5% + let num_words = logits_shape.get(1).copied().unwrap_or(0); + let dim_mw = logits_shape.get(2).copied().unwrap_or(0); + let dim_e = logits_shape.get(3).copied().unwrap_or(0); + for start in 0..num_words { + for width in 0..dim_mw.min(num_words - start) { + for class_idx in 0..dim_e { + let flat_idx = start * dim_mw * dim_e + width * dim_e + class_idx; + if flat_idx < logits_data.len() { + let score = logits_data[flat_idx]; + let prob = sigmoid(score); + if prob > 0.05 { + let end = start + width; + let w_start = if start < words.len() { &words[start].text } else { "?" }; + let w_end = if end < words.len() { &words[end].text } else { "?" }; + debug!(" span[{}..{}] '{}'->'{}' label={} score={:.3} prob={:.3}", + start, end, w_start, w_end, labels.get(class_idx).unwrap_or(&"?"), score, prob); + } + } + } + } + } + + let entities = decode_and_search( + logits_data, &logits_shape, &words, text, &labels, MAX_WIDTH, THRESHOLD, + ); + + let mut result = HashMap::new(); + + for entity in &entities { + if entity.prob < MIN_CONFIDENCE { + continue; + } + + debug!("GLiNER entity: '{}' -> '{}' ({:.1}%)", + entity.text, entity.label, entity.prob * 100.0); + + if let Some(slot_names) = label_to_slots.get(entity.label.as_str()) { + for &slot_name in slot_names { + if !result.contains_key(slot_name) { + let value = parse_slot_value(&entity.text); + result.insert(slot_name.to_string(), value); + } + } + } + } + + Ok(result) +} + +fn parse_slot_value(text: &str) -> SlotValue { + if let Ok(n) = text.parse::() { + return SlotValue::Number(n); + } + SlotValue::Text(text.to_string()) +} \ No newline at end of file diff --git a/crates/jarvis-core/src/slots/gliner/structs.rs b/crates/jarvis-core/src/slots/gliner/structs.rs new file mode 100644 index 0000000..493ebb0 --- /dev/null +++ b/crates/jarvis-core/src/slots/gliner/structs.rs @@ -0,0 +1,7 @@ +#[derive(Debug, Clone)] +pub struct GlinerModelInfo { + pub model_dir: String, + pub file_name: String, + pub display_name: String, + pub value: String, +} \ No newline at end of file diff --git a/crates/jarvis-core/src/stt/vosk.rs b/crates/jarvis-core/src/stt/vosk.rs index 9631917..1f1e878 100644 --- a/crates/jarvis-core/src/stt/vosk.rs +++ b/crates/jarvis-core/src/stt/vosk.rs @@ -163,10 +163,26 @@ fn get_configured_model_path() -> Result { } } - // auto-detect: use first available model + // auto-detect: prefer model matching current language let available = vosk_models::scan_vosk_models(); + let language = i18n::get_language(); + + // try language match first + let lang_code = match language.as_str() { + "ru" => "ru", + "en" => "us", // vosk uses "us" not "en" + "ua" => "uk", // vosk uses "uk" not "ua" + other => other, + }; + + if let Some(matched) = available.iter().find(|m| m.language == lang_code) { + info!("Auto-detected Vosk model for '{}': {}", language, matched.name); + return Ok(matched.path.clone()); + } + + // fallback to first available if let Some(first) = available.first() { - info!("Auto-detected Vosk model: {}", first.name); + info!("Auto-detected Vosk model (no language match): {}", first.name); return Ok(first.path.clone()); } diff --git a/crates/jarvis-core/src/vosk_models.rs b/crates/jarvis-core/src/vosk_models.rs index e80408f..6de4b49 100644 --- a/crates/jarvis-core/src/vosk_models.rs +++ b/crates/jarvis-core/src/vosk_models.rs @@ -18,7 +18,7 @@ pub fn scan_vosk_models() -> Vec { }; let mut models = Vec::new(); - info!("TESTTTTTTTTTTTTT: {}", models_dir.display()); + info!("VOSK MODELS DIR: {}", models_dir.display()); if !models_dir.exists() { warn!("Vosk models directory not found: {}", models_dir.display()); diff --git a/crates/jarvis-gui/src/main.rs b/crates/jarvis-gui/src/main.rs index 0d16504..a4a6387 100644 --- a/crates/jarvis-gui/src/main.rs +++ b/crates/jarvis-gui/src/main.rs @@ -85,6 +85,9 @@ fn main() { // vosk tauri_commands::list_vosk_models, + // gliner + tauri_commands::list_gliner_models, + // i18n tauri_commands::get_translations, tauri_commands::translate, diff --git a/crates/jarvis-gui/src/tauri_commands/db.rs b/crates/jarvis-gui/src/tauri_commands/db.rs index 0fd0073..02eabcb 100644 --- a/crates/jarvis-gui/src/tauri_commands/db.rs +++ b/crates/jarvis-gui/src/tauri_commands/db.rs @@ -10,6 +10,8 @@ pub fn db_read(state: tauri::State<'_, AppState>, key: &str) -> String { "assistant_voice" => settings.voice.clone(), "selected_wake_word_engine" => format!("{:?}", settings.wake_word_engine), "selected_intent_recognition_engine" => format!("{:?}", settings.intent_recognition_engine), + "selected_slot_extraction_engine" => format!("{:?}", settings.slot_extraction_engine), + "selected_gliner_model" => settings.gliner_model.clone(), "selected_vosk_model" => settings.vosk_model.clone(), "speech_to_text_engine" => format!("{:?}", settings.speech_to_text_engine), "noise_suppression" => format!("{:?}", settings.noise_suppression), @@ -54,6 +56,16 @@ pub fn db_write(state: tauri::State<'_, AppState>, key: &str, val: &str) -> bool _ => return false, } } + "selected_slot_extraction_engine" => { + match val.to_lowercase().as_str() { + "none" => settings.slot_extraction_engine = jarvis_core::config::structs::SlotExtractionEngine::None, + "gliner" => settings.slot_extraction_engine = jarvis_core::config::structs::SlotExtractionEngine::GLiNER, + _ => return false, + } + } + "selected_gliner_model" => { + settings.gliner_model = val.to_string(); + } "selected_vosk_model" => { settings.vosk_model = val.to_string(); } diff --git a/crates/jarvis-gui/src/tauri_commands/stt.rs b/crates/jarvis-gui/src/tauri_commands/stt.rs index f0ae9b5..7eac51b 100644 --- a/crates/jarvis-gui/src/tauri_commands/stt.rs +++ b/crates/jarvis-gui/src/tauri_commands/stt.rs @@ -1,4 +1,4 @@ -use jarvis_core::{vosk_models, DB}; +use jarvis_core::{vosk_models, gliner_models}; use serde::Serialize; #[derive(Serialize)] @@ -8,6 +8,12 @@ pub struct VoskModel { pub size: String, } +#[derive(Serialize)] +pub struct GlinerVariant { + pub display_name: String, + pub value: String, +} + #[tauri::command] pub fn list_vosk_models() -> Vec { vosk_models::scan_vosk_models() @@ -18,4 +24,15 @@ pub fn list_vosk_models() -> Vec { size: m.size, }) .collect() -} \ No newline at end of file +} + +#[tauri::command] +pub fn list_gliner_models() -> Vec { + gliner_models::scan_gliner_variants() + .into_iter() + .map(|m| GlinerVariant { + display_name: m.display_name, + value: m.value, + }) + .collect() +} diff --git a/frontend/src/routes/settings/index.svelte b/frontend/src/routes/settings/index.svelte index 648f358..ab8af45 100644 --- a/frontend/src/routes/settings/index.svelte +++ b/frontend/src/routes/settings/index.svelte @@ -67,6 +67,7 @@ let availableMicrophones: MicrophoneOption[] = [] let availableVoskModels: { label: string; value: string }[] = [] + let availableGlinerModels: { label: string; value: string }[] = [] let settingsSaved = false let saveButtonDisabled = false @@ -75,6 +76,8 @@ let selectedMicrophone = "" let selectedWakeWordEngine = "" let selectedIntentRecognitionEngine = "" + let selectedSlotExtractionEngine = "" + let selectedGlinerModel = "" let selectedVoskModel = "" let selectedNoiseSuppression = "" let selectedVad = "" @@ -105,6 +108,8 @@ invoke("db_write", { key: "selected_microphone", val: selectedMicrophone }), invoke("db_write", { key: "selected_wake_word_engine", val: selectedWakeWordEngine }), invoke("db_write", { key: "selected_intent_recognition_engine", val: selectedIntentRecognitionEngine }), + invoke("db_write", { key: "selected_slot_extraction_engine", val: selectedSlotExtractionEngine }), + invoke("db_write", { key: "selected_gliner_model", val: selectedGlinerModel }), invoke("db_write", { key: "selected_vosk_model", val: selectedVoskModel }), invoke("db_write", { key: "noise_suppression", val: selectedNoiseSuppression }), @@ -173,13 +178,22 @@ value: m.name })) + // load gliner models + const glinerModels = await invoke<{ display_name: string; value: string }[]>("list_gliner_models") + availableGlinerModels = glinerModels.map(m => ({ + label: m.display_name, + value: m.value, + })) + // load settings from db - const [mic, wakeWord, intentReco, voskModel, + const [mic, wakeWord, intentReco, slotEngine, glinerModel, voskModel, noiseSuppression, vad, gainNormalizer, pico, openai] = await Promise.all([ invoke("db_read", { key: "selected_microphone" }), invoke("db_read", { key: "selected_wake_word_engine" }), invoke("db_read", { key: "selected_intent_recognition_engine" }), + invoke("db_read", { key: "selected_slot_extraction_engine" }), + invoke("db_read", { key: "selected_gliner_model" }), invoke("db_read", { key: "selected_vosk_model" }), invoke("db_read", { key: "noise_suppression" }), @@ -193,7 +207,9 @@ selectedMicrophone = mic selectedWakeWordEngine = wakeWord selectedIntentRecognitionEngine = intentReco + selectedSlotExtractionEngine = slotEngine selectedVoskModel = voskModel + selectedGlinerModel = glinerModel selectedNoiseSuppression = noiseSuppression selectedVad = vad gainNormalizerEnabled = gainNormalizer === "true" @@ -368,7 +384,43 @@ /> + + {#if selectedSlotExtractionEngine === "GLiNER"} + + {#key availableGlinerModels} + + {/key} + + {#if availableGlinerModels.length === 0} + + + + {t('settings-gliner-models-hint')} + + + {/if} + {/if} + + os.path.getmtime(dst): + return True + return False + + +def sync_directory(src_dir, dst_dir): + """sync dst_dir to match src_dir: copy new/changed, remove orphans""" + copied = 0 + updated = 0 + removed = 0 + + # walk source, copy new/changed files + for root, dirs, files in os.walk(src_dir): + rel_root = os.path.relpath(root, src_dir) + dst_root = os.path.join(dst_dir, rel_root) if rel_root != "." else dst_dir + + # ensure dir exists + os.makedirs(dst_root, exist_ok=True) + + for f in files: + src_file = os.path.join(root, f) + dst_file = os.path.join(dst_root, f) + + if not os.path.exists(dst_file): + shutil.copy2(src_file, dst_file) + copied += 1 + elif files_differ(src_file, dst_file): + shutil.copy2(src_file, dst_file) + updated += 1 + + # walk destination, remove files/dirs not in source + for root, dirs, files in os.walk(dst_dir, topdown=False): + rel_root = os.path.relpath(root, dst_dir) + src_root = os.path.join(src_dir, rel_root) if rel_root != "." else src_dir + + for f in files: + dst_file = os.path.join(root, f) + src_file = os.path.join(src_root, f) + if not os.path.exists(src_file): + os.remove(dst_file) + print(f" [-] Removed orphan file: {os.path.relpath(dst_file, dst_dir)}") + removed += 1 + + for d in dirs: + dst_sub = os.path.join(root, d) + src_sub = os.path.join(src_root, d) + if not os.path.exists(src_sub): + shutil.rmtree(dst_sub) + print(f" [-] Removed orphan dir: {os.path.relpath(dst_sub, dst_dir)}") + removed += 1 + + return copied, updated, removed + + +def sync_file(src_path, dst_path): + """sync a single file, returns (copied, updated)""" + if not os.path.exists(dst_path): + os.makedirs(os.path.dirname(dst_path), exist_ok=True) + shutil.copy2(src_path, dst_path) + return 1, 0 + elif files_differ(src_path, dst_path): + shutil.copy2(src_path, dst_path) + return 0, 1 + return 0, 0 + for tdir in TARGET_DIRS: tdir = ABS_PATH + tdir @@ -47,7 +127,6 @@ for tdir in TARGET_DIRS: print("Skipping target, not a directory: ", tdir) continue - # copy lib files for entry in SOURCE: if isinstance(entry, tuple): src, dest_name = entry @@ -57,11 +136,22 @@ for tdir in TARGET_DIRS: src_path = ABS_PATH + src if os.path.isdir(src_path): - # copy the whole directory target_name = dest_name if dest_name else os.path.basename(src.rstrip('/')) full_target_dir_path = os.path.join(tdir, target_name) - - if os.path.isdir(full_target_dir_path): + + if sync_mode: + # sync: update changed, add new, remove orphans + if os.path.isdir(full_target_dir_path): + c, u, r = sync_directory(src_path, full_target_dir_path) + if c or u or r: + print(f"[~] Synced: {src} -> {target_name} (+{c} new, ~{u} updated, -{r} removed)") + else: + print(f"[=] Up to date: {src} -> {target_name}") + else: + shutil.copytree(src_path, full_target_dir_path) + print("[+] Directory copied: ", src, "->", target_name) + + elif os.path.isdir(full_target_dir_path): if force_overwrite: shutil.rmtree(full_target_dir_path) shutil.copytree(src_path, full_target_dir_path) @@ -73,11 +163,19 @@ for tdir in TARGET_DIRS: print("[+] Directory copied: ", src, "->", target_name) elif os.path.isfile(src_path): - # copy file target_name = dest_name if dest_name else os.path.basename(src) full_target_file_path = os.path.join(tdir, target_name) - - if os.path.isfile(full_target_file_path): + + if sync_mode: + c, u = sync_file(src_path, full_target_file_path) + if c: + print("[+] File copied: ", src, "->", target_name) + elif u: + print("[~] File updated: ", src, "->", target_name) + else: + print("[=] Up to date: ", src, "->", target_name) + + elif os.path.isfile(full_target_file_path): if force_overwrite: os.remove(full_target_file_path) shutil.copy(src_path, full_target_file_path) @@ -90,4 +188,4 @@ for tdir in TARGET_DIRS: else: print("[?] Unknown entity to copy: ", src) - print("Post compile build done.") + print("Post compile build done.") \ No newline at end of file diff --git a/resources/commands/hello/command.toml b/resources/commands/hello/command.toml deleted file mode 100644 index d3c9d70..0000000 --- a/resources/commands/hello/command.toml +++ /dev/null @@ -1,14 +0,0 @@ -[[commands]] -id = "hello" -type = "lua" -script = "script.lua" -sandbox = "minimal" -timeout = 5000 -phrases.ru = [ - "привет", - "здравствуй", -] -phrases.en = [ - "hello", - "hi", -] \ No newline at end of file diff --git a/resources/commands/hello/script.lua b/resources/commands/hello/script.lua deleted file mode 100644 index 57e17b8..0000000 --- a/resources/commands/hello/script.lua +++ /dev/null @@ -1,21 +0,0 @@ --- simple test hello command - -local lang = jarvis.context.language -local hour = tonumber(jarvis.context.time.hour) - --- determine greeting based on time -local greeting -if hour >= 5 and hour < 12 then - greeting = lang == "ru" and "Доброе утро" or "Good morning" -elseif hour >= 12 and hour < 17 then - greeting = lang == "ru" and "Добрый день" or "Good afternoon" -elseif hour >= 17 and hour < 22 then - greeting = lang == "ru" and "Добрый вечер" or "Good evening" -else - greeting = lang == "ru" and "Доброй ночи" or "Good night" -end - -jarvis.log("info", "Greeting user: " .. greeting) -jarvis.audio.play_reply() - -return { chain = true } \ No newline at end of file diff --git a/resources/commands/test_slots/command.toml b/resources/commands/test_slots/command.toml new file mode 100644 index 0000000..62aef0c --- /dev/null +++ b/resources/commands/test_slots/command.toml @@ -0,0 +1,14 @@ +[[commands]] +id = "test_greet_name" +type = "lua" +script = "greet.lua" + +[commands.phrases] +en = ["say hello to {name}", "greet {name}"] +ru = ["поздоровайся с {name}", "привет {name}"] + +[commands.slots.name] +entity = "person name" + +[commands.slots.city] +entity = "city name" \ No newline at end of file diff --git a/resources/commands/test_slots/greet.lua b/resources/commands/test_slots/greet.lua new file mode 100644 index 0000000..9751dc9 --- /dev/null +++ b/resources/commands/test_slots/greet.lua @@ -0,0 +1,2 @@ +local name = jarvis.context.slots.name or "[UNKNOWN]" +print("Hello, " .. name .. "!") \ No newline at end of file diff --git a/resources/commands/weather/command.toml b/resources/commands/weather/command.toml index 3bfc7bf..268f3e5 100644 --- a/resources/commands/weather/command.toml +++ b/resources/commands/weather/command.toml @@ -6,19 +6,16 @@ timeout = 5000 sandbox = "standard" [commands.phrases] -ru = [ - "какая погода", - "погода" -] -en = [ - "what's the weather", - "weather" -] +ru = ["какая погода в {city}", "погода {city}"] +en = ["what's the weather in {city}", "weather in {city}"] [commands.sounds] ru = ["weather_ru_1", "weather_ru_2"] en = ["weather_en_1", "weather_en_2"] +[commands.slots.city] +entity = "city name" +context = ["in", "for", "at", "в", "для"] [[commands]]