mirror of
https://github.com/Priler/jarvis.git
synced 2026-05-26 07:08:11 +00:00
some fixes + gliner first implementation
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -50,3 +50,6 @@ tree.txt
|
||||
|
||||
# Ignore packaged crates
|
||||
*.crate
|
||||
|
||||
# Ignore AI models
|
||||
/resources/models/*
|
||||
|
||||
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -3305,12 +3305,15 @@ dependencies = [
|
||||
"kira",
|
||||
"log",
|
||||
"mlua",
|
||||
"ndarray",
|
||||
"nnnoiseless",
|
||||
"once_cell",
|
||||
"ort",
|
||||
"parking_lot",
|
||||
"platform-dirs",
|
||||
"pv_recorder",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"reqwest 0.13.1",
|
||||
"rodio",
|
||||
"rustpotter",
|
||||
@@ -3320,6 +3323,7 @@ dependencies = [
|
||||
"serde_yaml",
|
||||
"sha2",
|
||||
"tempfile",
|
||||
"tokenizers",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"toml 0.9.11+spec-1.1.0",
|
||||
|
||||
@@ -45,4 +45,10 @@ mlua = { version = "0.11.5", features = ["lua55", "vendored", "async", "serde"]
|
||||
reqwest = { version = "0.13.1", features = ["blocking", "json"] }
|
||||
tempfile = "^3.24"
|
||||
winrt-notification = "0.5"
|
||||
fastembed = { version = "^5.8.1", default-features = false, features = ["ort-download-binaries"] }
|
||||
|
||||
fastembed = { version = "^5.8.1", default-features = false, features = ["ort-download-binaries"] }
|
||||
ort = { version = "=2.0.0-rc.11" }
|
||||
ndarray = "0.17"
|
||||
tokenizers = { version = "0.22", default-features = false }
|
||||
regex = "1"
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::sync::mpsc::Receiver;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use jarvis_core::{audio_buffer::AudioRingBuffer, audio_processing, commands, config, listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}, i18n};
|
||||
use jarvis_core::{audio_buffer::AudioRingBuffer, audio_processing, commands, config, listener, recorder, stt, COMMANDS_LIST, intent, voices, ipc::{self, IpcEvent}, i18n, slots};
|
||||
use rand::seq::SliceRandom;
|
||||
|
||||
use crate::should_stop;
|
||||
@@ -23,8 +23,8 @@ fn main_loop(text_cmd_rx: Receiver<String>) -> Result<(), ()> {
|
||||
let sample_rate: usize = 16000;
|
||||
let mut frame_buffer: Vec<i16> = vec![0; frame_length];
|
||||
|
||||
// ring buffer: keeps last 2 seconds of audio (pre-roll)
|
||||
let mut audio_buffer = AudioRingBuffer::new(2.0, frame_length, sample_rate);
|
||||
// ring buffer: keeps last 5 seconds of audio (pre-roll)
|
||||
let mut audio_buffer = AudioRingBuffer::new(5.0, frame_length, sample_rate);
|
||||
|
||||
// VAD state
|
||||
let mut vad_state = VadState::WaitingForVoice;
|
||||
@@ -162,8 +162,8 @@ fn recognize_command(
|
||||
let mut first_recognition = prefed_audio;
|
||||
|
||||
// longer silence threshold for commands (user might pause to think)
|
||||
// 2 seconds
|
||||
let silence_threshold: u32 = ((2.0 * sample_rate as f32) / frame_length as f32) as u32;
|
||||
// 5 seconds
|
||||
let silence_threshold: u32 = ((5.0 * sample_rate as f32) / frame_length as f32) as u32;
|
||||
|
||||
loop {
|
||||
if crate::should_stop() {
|
||||
@@ -262,6 +262,11 @@ fn recognize_command(
|
||||
|
||||
recognized_voice = recognized_voice.trim().to_string();
|
||||
|
||||
if recognized_voice.len() < 5 {
|
||||
debug!("Ignoring too short recognition: '{}'", recognized_voice);
|
||||
continue;
|
||||
}
|
||||
|
||||
if recognized_voice.is_empty() {
|
||||
continue;
|
||||
}
|
||||
@@ -360,7 +365,18 @@ fn execute_command(text: &str, rt: &tokio::runtime::Runtime) -> bool {
|
||||
if let Some((cmd_path, cmd_config)) = cmd_result {
|
||||
info!("Command found: {:?}", cmd_path);
|
||||
|
||||
match commands::execute_command(&cmd_path, &cmd_config, Some(&text)) {
|
||||
// extract slots if needed
|
||||
let extracted_slots = if !cmd_config.slots.is_empty() {
|
||||
let s = slots::extract(text, &cmd_config.slots);
|
||||
if !s.is_empty() {
|
||||
info!("Extracted slots: {:?}", s);
|
||||
}
|
||||
Some(s)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match commands::execute_command(&cmd_path, &cmd_config, Some(&text), extracted_slots.as_ref()) {
|
||||
Ok(chain) => {
|
||||
info!("Command executed successfully");
|
||||
// voices::play_ok();
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use jarvis_core::slots;
|
||||
use parking_lot::RwLock;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
@@ -105,6 +106,9 @@ fn main() -> Result<(), String> {
|
||||
}
|
||||
});
|
||||
|
||||
// init slots parsing engine
|
||||
slots::init().map_err(|e| error!("Slot extraction init failed: {}", e)).ok();
|
||||
|
||||
// init audio processing
|
||||
info!("Initializing audio processing...");
|
||||
if let Err(e) = audio_processing::init() {
|
||||
|
||||
@@ -43,13 +43,21 @@ reqwest = { workspace = true, optional = true }
|
||||
tempfile.workspace = true
|
||||
|
||||
fastembed = { workspace = true, optional = true }
|
||||
ort = { workspace = true, optional = true }
|
||||
ndarray = { workspace = true, optional = true }
|
||||
tokenizers = { workspace = true, optional = true }
|
||||
regex = { workspace = true, optional = true }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winrt-notification = { workspace = true, optional = true }
|
||||
|
||||
[features]
|
||||
default = ["jarvis_app"]
|
||||
jarvis_app = ["vosk", "intent-classifier", "fastembed", "tokio", "nnnoiseless", "tokio-tungstenite", "futures-util", "lua"]
|
||||
jarvis_app = [
|
||||
"vosk", "intent-classifier", "fastembed", "tokio", "nnnoiseless", "tokio-tungstenite", "futures-util",
|
||||
"lua",
|
||||
"ort", "ndarray", "tokenizers", "regex",]
|
||||
|
||||
intent = ["intent-classifier", "tokio"]
|
||||
lua = ["mlua", "reqwest", "winrt-notification"]
|
||||
lua_only = ["lua", "tokio"]
|
||||
@@ -45,7 +45,9 @@ impl NnnoiselessNS {
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.state = DenoiseState::new();
|
||||
// self.state = DenoiseState::new();
|
||||
// self.buffer.clear();
|
||||
|
||||
self.buffer.clear();
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::fs;
|
||||
use std::time::Duration;
|
||||
@@ -191,7 +192,8 @@ pub fn execute_cli(cmd: &str, args: &[String]) -> std::io::Result<Child> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute_command(cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option<&str>) -> Result<bool, String> {
|
||||
pub fn execute_command(cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option<&str>, slots: Option<&HashMap<String, SlotValue>>) -> Result<bool, String> {
|
||||
// execute command by the type
|
||||
match cmd_config.cmd_type.as_str() {
|
||||
|
||||
// BRUH
|
||||
@@ -200,7 +202,7 @@ pub fn execute_command(cmd_path: &PathBuf, cmd_config: &JCommand, phrase: Option
|
||||
// LUA command
|
||||
#[cfg(feature = "lua")]
|
||||
"lua" => {
|
||||
execute_lua_command(cmd_path, cmd_config, phrase)
|
||||
execute_lua_command(cmd_path, cmd_config, phrase, slots)
|
||||
}
|
||||
|
||||
// AutoHotkey command
|
||||
@@ -254,8 +256,10 @@ fn execute_lua_command(
|
||||
cmd_path: &PathBuf,
|
||||
cmd_config: &JCommand,
|
||||
phrase: Option<&str>,
|
||||
slots: Option<&HashMap<String, SlotValue>>
|
||||
) -> Result<bool, String> {
|
||||
// get script path
|
||||
|
||||
let script_name = if cmd_config.script.is_empty() {
|
||||
"script.lua"
|
||||
} else {
|
||||
@@ -270,13 +274,14 @@ fn execute_lua_command(
|
||||
|
||||
// parse sandbox level
|
||||
let sandbox = SandboxLevel::from_str(&cmd_config.sandbox);
|
||||
|
||||
|
||||
// create context
|
||||
let context = CommandContext {
|
||||
phrase: phrase.unwrap_or("").to_string(),
|
||||
command_id: cmd_config.id.clone(),
|
||||
command_path: cmd_path.clone(),
|
||||
language: i18n::get_language(),
|
||||
slots: slots.map(|s| s.clone()),
|
||||
};
|
||||
|
||||
// get timeout
|
||||
|
||||
@@ -59,6 +59,9 @@ pub struct JCommand {
|
||||
#[serde(default)]
|
||||
pub phrases: HashMap<String, Vec<String>>,
|
||||
|
||||
// Slot definitions: slot_name -> how to extract it
|
||||
#[serde(default)]
|
||||
pub slots: HashMap<String, SlotDefinition>,
|
||||
|
||||
// CACHE
|
||||
#[serde(skip, default)]
|
||||
@@ -90,6 +93,8 @@ impl Clone for JCommand {
|
||||
sounds: self.sounds.clone(),
|
||||
phrases: self.phrases.clone(),
|
||||
|
||||
slots: self.slots.clone(),
|
||||
|
||||
// empty caches for cloned instance
|
||||
sounds_cache: RwLock::new(HashMap::new()),
|
||||
phrases_cache: RwLock::new(HashMap::new()),
|
||||
@@ -150,4 +155,25 @@ impl JCommand {
|
||||
// fallback to first available
|
||||
map.values().next().cloned().unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct SlotDefinition {
|
||||
// Entity label for GLiNER (e.g. "city name", "song title", "number")
|
||||
// This is a free-form description - GLiNER matches it semantically
|
||||
#[serde(default)]
|
||||
pub entity: String,
|
||||
|
||||
// Optional: fallback context words for template-based extraction
|
||||
// e.g. ["in", "for", "at"] for a city slot
|
||||
#[serde(default)]
|
||||
pub context: Vec<String>,
|
||||
}
|
||||
|
||||
// Extracted slot value passed to commands
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum SlotValue {
|
||||
Text(String),
|
||||
Number(f64),
|
||||
}
|
||||
@@ -18,6 +18,7 @@ use rustpotter::{
|
||||
};
|
||||
|
||||
use crate::IntentRecognitionEngine;
|
||||
use crate::SlotExtractionEngine;
|
||||
use crate::config::structs::NoiseSuppressionBackend;
|
||||
use crate::config::structs::VadBackend;
|
||||
use crate::{APP_CONFIG_DIR, APP_DIRS, APP_LOG_DIR};
|
||||
@@ -156,6 +157,9 @@ pub const VOSK_SPEECH_PARTIAL_WORDS: bool = false;
|
||||
// IRE (intents recognition)
|
||||
pub const INTENT_CLASSIFIER_MIN_CONFIDENCE: f64 = 0.75;
|
||||
|
||||
// SLOTS EXTRACTION
|
||||
pub const DEFAULT_SLOT_EXTRACTION_ENGINE: SlotExtractionEngine = SlotExtractionEngine::None;
|
||||
|
||||
// embedding classifier
|
||||
pub const EMBEDDING_MIN_CONFIDENCE: f64 = 0.70;
|
||||
|
||||
|
||||
@@ -45,6 +45,11 @@ pub enum AudioType {
|
||||
Kira,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Debug, PartialEq)]
|
||||
pub enum SlotExtractionEngine {
|
||||
None,
|
||||
GLiNER,
|
||||
}
|
||||
|
||||
|
||||
impl fmt::Display for WakeWordEngine {
|
||||
@@ -77,6 +82,12 @@ impl fmt::Display for VadBackend {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SlotExtractionEngine {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
// pub enum TextToSpeechEngine {}
|
||||
|
||||
// pub enum IntentRecognitionEngine {}
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::config::structs::WakeWordEngine;
|
||||
use crate::config::structs::IntentRecognitionEngine;
|
||||
use crate::config::structs::NoiseSuppressionBackend;
|
||||
use crate::config::structs::VadBackend;
|
||||
use crate::config::structs::SlotExtractionEngine;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct Settings {
|
||||
@@ -14,6 +15,10 @@ pub struct Settings {
|
||||
|
||||
pub wake_word_engine: WakeWordEngine,
|
||||
pub intent_recognition_engine: IntentRecognitionEngine,
|
||||
|
||||
pub slot_extraction_engine: SlotExtractionEngine,
|
||||
pub gliner_model: String,
|
||||
|
||||
pub speech_to_text_engine: SpeechToTextEngine,
|
||||
pub vosk_model: String,
|
||||
|
||||
@@ -35,6 +40,8 @@ impl Default for Settings {
|
||||
|
||||
wake_word_engine: config::DEFAULT_WAKE_WORD_ENGINE,
|
||||
intent_recognition_engine: config::DEFAULT_INTENT_RECOGNITION_ENGINE,
|
||||
slot_extraction_engine: SlotExtractionEngine::None,
|
||||
gliner_model: String::new(),
|
||||
speech_to_text_engine: config::DEFAULT_SPEECH_TO_TEXT_ENGINE,
|
||||
vosk_model: String::from(""), // auto detect first available
|
||||
|
||||
|
||||
130
crates/jarvis-core/src/gliner_models.rs
Normal file
130
crates/jarvis-core/src/gliner_models.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use crate::APP_DIR;
|
||||
|
||||
const GLINER_DIRS: &[&str] = &["gliner_small-v2.1", "gliner_multi-v2.1"];
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GlinerModelVariant {
|
||||
// type id stored in settings, e.g. "int8", "fp16", "full"
|
||||
pub value: String,
|
||||
// shown in dropdown, e.g. "int8 (174MB / 332MB)"
|
||||
pub display_name: String,
|
||||
}
|
||||
|
||||
// scan both model dirs and return a deduplicated list of model types
|
||||
pub fn scan_gliner_variants() -> Vec<GlinerModelVariant> {
|
||||
let base = APP_DIR.join("resources").join("models");
|
||||
|
||||
// collect: type -> { dir_name -> size_mb }
|
||||
let mut types: HashMap<String, HashMap<String, u64>> = HashMap::new();
|
||||
|
||||
for dir_name in GLINER_DIRS {
|
||||
let onnx_dir = base.join(dir_name).join("onnx");
|
||||
if !onnx_dir.exists() { continue; }
|
||||
|
||||
let entries = match fs::read_dir(&onnx_dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
let file_name = match path.file_name().and_then(|n| n.to_str()) {
|
||||
Some(n) if n.ends_with(".onnx") => n.to_string(),
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let variant_type = file_name_to_type(&file_name);
|
||||
let size_mb = fs::metadata(&path).map(|m| m.len()).unwrap_or(0) / (1024 * 1024);
|
||||
|
||||
types.entry(variant_type)
|
||||
.or_default()
|
||||
.insert(dir_name.to_string(), size_mb);
|
||||
}
|
||||
}
|
||||
|
||||
let mut result: Vec<GlinerModelVariant> = types.into_iter().map(|(variant, sizes)| {
|
||||
let size_str = build_size_string(&sizes);
|
||||
let label = if variant == "full" { "Full".to_string() } else { variant.clone() };
|
||||
GlinerModelVariant {
|
||||
display_name: format!("{} ({})", label, size_str),
|
||||
value: variant,
|
||||
}
|
||||
}).collect();
|
||||
|
||||
// sort: full first, then alphabetically
|
||||
result.sort_by(|a, b| {
|
||||
let a_full = a.value == "full";
|
||||
let b_full = b.value == "full";
|
||||
b_full.cmp(&a_full).then_with(|| a.value.cmp(&b.value))
|
||||
});
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// "model.onnx" -> "full", "model_int8.onnx" -> "int8"
|
||||
fn file_name_to_type(name: &str) -> String {
|
||||
let stem = name.strip_suffix(".onnx").unwrap_or(name);
|
||||
if stem == "model" {
|
||||
"full".to_string()
|
||||
} else if let Some(variant) = stem.strip_prefix("model_") {
|
||||
variant.to_string()
|
||||
} else {
|
||||
stem.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// build size display: "174MB" if only one dir, "small: 174MB / multi: 332MB" if both
|
||||
fn build_size_string(sizes: &HashMap<String, u64>) -> String {
|
||||
if sizes.len() == 1 {
|
||||
let (dir, mb) = sizes.iter().next().unwrap();
|
||||
let short = short_dir_name(dir);
|
||||
return format!("{}: {}MB", short, mb);
|
||||
}
|
||||
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
for dir_name in GLINER_DIRS {
|
||||
if let Some(mb) = sizes.get(*dir_name) {
|
||||
parts.push(format!("{}: {}MB", short_dir_name(dir_name), mb));
|
||||
}
|
||||
}
|
||||
parts.join(" / ")
|
||||
}
|
||||
|
||||
fn short_dir_name(dir: &str) -> &str {
|
||||
if dir.contains("small") { "small" }
|
||||
else if dir.contains("multi") { "multi" }
|
||||
else { dir }
|
||||
}
|
||||
|
||||
// resolve variant type + language into actual file path
|
||||
// returns (model_dir_path, onnx_file_name) or None
|
||||
pub fn resolve_model(variant: &str, language: &str) -> Option<(std::path::PathBuf, String)> {
|
||||
let base = APP_DIR.join("resources").join("models");
|
||||
let file_name = type_to_file_name(variant);
|
||||
|
||||
// pick dir based on language
|
||||
let preferred: &[&str] = match language {
|
||||
"en" => &["gliner_small-v2.1", "gliner_multi-v2.1"],
|
||||
_ => &["gliner_multi-v2.1", "gliner_small-v2.1"],
|
||||
};
|
||||
|
||||
for dir_name in preferred {
|
||||
let path = base.join(dir_name).join("onnx").join(&file_name);
|
||||
if path.exists() {
|
||||
return Some((base.join(dir_name), file_name));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
// "full" -> "model.onnx", "int8" -> "model_int8.onnx"
|
||||
fn type_to_file_name(variant: &str) -> String {
|
||||
if variant == "full" || variant.is_empty() {
|
||||
"model.onnx".to_string()
|
||||
} else {
|
||||
format!("model_{}.onnx", variant)
|
||||
}
|
||||
}
|
||||
@@ -122,6 +122,15 @@ notification-error = Error
|
||||
notification-assistant-started = Assistant started
|
||||
notification-assistant-stopped = Assistant stopped
|
||||
|
||||
# SLOTS EXTRACTION
|
||||
settings-slot-engine = Slot extraction
|
||||
settings-slot-engine-desc = Extract parameters from voice commands (e.g. city name, number).
|
||||
settings-gliner-model = GLiNER ONNX model
|
||||
settings-gliner-model-desc =
|
||||
Select model variant.
|
||||
Smaller quantized models (int8, uint8) are faster but less accurate.
|
||||
settings-gliner-models-hint = No GLiNER models found.
|
||||
|
||||
# ETC
|
||||
search-error-not-running = Assistant is not running
|
||||
search-error-failed = Failed to execute command
|
||||
|
||||
@@ -122,6 +122,15 @@ notification-error = Ошибка
|
||||
notification-assistant-started = Ассистент запущен
|
||||
notification-assistant-stopped = Ассистент остановлен
|
||||
|
||||
# SLOTS EXTRACTION
|
||||
settings-slot-engine = Извлечение параметров
|
||||
settings-slot-engine-desc = Извлекает параметры из голосовых команд (напр. название города, число).
|
||||
settings-gliner-model = Модель GLiNER ONNX
|
||||
settings-gliner-model-desc =
|
||||
Выберите вариант модели.
|
||||
Квантизированные модели (int8, uint8) быстрее, но менее точны.
|
||||
settings-gliner-models-hint = Модели GLiNER не найдены.
|
||||
|
||||
# ETC
|
||||
search-error-not-running = Ассистент не запущен
|
||||
search-error-failed = Не удалось выполнить команду
|
||||
|
||||
@@ -122,8 +122,16 @@ notification-error = Помилка
|
||||
notification-assistant-started = Асистент запущено
|
||||
notification-assistant-stopped = Асистент зупинено
|
||||
|
||||
# ETC
|
||||
# SLOTS EXTRACTION
|
||||
settings-slot-engine = Витяг параметрів
|
||||
settings-slot-engine-desc = Витягує параметри з голосових команд (напр. назва міста, число).
|
||||
settings-gliner-model = Модель GLiNER ONNX
|
||||
settings-gliner-model-desc =
|
||||
Оберіть варіант моделі.
|
||||
Квантизовані моделі (int8, uint8) швидші, але менш точні.
|
||||
settings-gliner-models-hint = Моделі GLiNER не знайдено.
|
||||
|
||||
# ETC
|
||||
search-error-not-running = Асистент не запущено
|
||||
search-error-failed = Не вдалося виконати команду
|
||||
settings-no-voices = Голоси не знайдено
|
||||
@@ -40,10 +40,12 @@ pub fn init(commands: &[JCommandsList]) -> Result<(), String> {
|
||||
match i18n::get_language().as_str() {
|
||||
"en" => {
|
||||
// smaller model for English
|
||||
info!("Loading all-MiniLM-L6-v2 ...");
|
||||
model_dir = APP_DIR.join("resources").join("models").join("all-MiniLM-L6-v2");
|
||||
},
|
||||
_ => {
|
||||
// bigger model for any other languages (multilingual)
|
||||
info!("Loading paraphrase-multilingual-MiniLM-L12-v2-onnx-Q ...");
|
||||
model_dir = APP_DIR.join("resources").join("models").join("paraphrase-multilingual-MiniLM-L12-v2-onnx-Q");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +26,11 @@ pub mod stt;
|
||||
#[cfg(feature = "intent")]
|
||||
pub mod intent;
|
||||
|
||||
#[cfg(feature = "jarvis_app")]
|
||||
pub mod slots;
|
||||
|
||||
pub mod vosk_models;
|
||||
pub mod gliner_models;
|
||||
|
||||
#[cfg(feature = "jarvis_app")]
|
||||
pub mod audio_processing;
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
use mlua::{Lua, Table};
|
||||
use crate::lua::{CommandContext};
|
||||
|
||||
use crate::commands::SlotValue;
|
||||
|
||||
pub fn register(lua: &Lua, jarvis: &Table, ctx: &CommandContext) -> mlua::Result<()> {
|
||||
let context = lua.create_table()?;
|
||||
|
||||
@@ -25,6 +27,18 @@ pub fn register(lua: &Lua, jarvis: &Table, ctx: &CommandContext) -> mlua::Result
|
||||
time.set("timestamp", now.timestamp())?;
|
||||
context.set("time", time)?;
|
||||
|
||||
// slots
|
||||
let slots_table = lua.create_table()?;
|
||||
if let Some(ref slots) = ctx.slots {
|
||||
for (name, value) in slots {
|
||||
match value {
|
||||
SlotValue::Text(t) => slots_table.set(name.as_str(), t.as_str())?,
|
||||
SlotValue::Number(n) => slots_table.set(name.as_str(), *n)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
context.set("slots", slots_table)?;
|
||||
|
||||
jarvis.set("context", context)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use std::path::PathBuf;
|
||||
use std::{collections::HashMap, path::PathBuf};
|
||||
|
||||
use crate::commands::SlotValue;
|
||||
|
||||
// Context passed to Lua scripts
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -14,6 +16,9 @@ pub struct CommandContext {
|
||||
|
||||
// Current language
|
||||
pub language: String,
|
||||
|
||||
// Slots
|
||||
pub slots: Option<HashMap<String, SlotValue>>
|
||||
}
|
||||
|
||||
// Result returned from Lua script execution
|
||||
|
||||
@@ -13,6 +13,7 @@ mod tests {
|
||||
command_id: "test_cmd".to_string(),
|
||||
command_path: cmd_path,
|
||||
language: "en".to_string(),
|
||||
slots: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
58
crates/jarvis-core/src/slots.rs
Normal file
58
crates/jarvis-core/src/slots.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
mod gliner;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
use crate::commands::{SlotDefinition, SlotValue};
|
||||
use crate::config::structs::SlotExtractionEngine;
|
||||
use crate::DB;
|
||||
|
||||
static SLOT_ENGINE: OnceCell<SlotExtractionEngine> = OnceCell::new();
|
||||
|
||||
pub fn init() -> Result<(), String> {
|
||||
if SLOT_ENGINE.get().is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let engine = DB.get()
|
||||
.map(|db| db.read().slot_extraction_engine)
|
||||
.unwrap_or(SlotExtractionEngine::None);
|
||||
|
||||
SLOT_ENGINE.set(engine).map_err(|_| "Slot engine already set")?;
|
||||
|
||||
match engine {
|
||||
SlotExtractionEngine::None => {
|
||||
info!("Slot extraction disabled");
|
||||
}
|
||||
SlotExtractionEngine::GLiNER => {
|
||||
info!("Initializing GLiNER slot extraction backend.");
|
||||
gliner::init()?;
|
||||
info!("GLiNER slot extraction backend initialized.");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Extract slot values from text using the configured engine
|
||||
pub fn extract(
|
||||
text: &str,
|
||||
slots: &HashMap<String, SlotDefinition>,
|
||||
) -> HashMap<String, SlotValue> {
|
||||
if slots.is_empty() {
|
||||
return HashMap::new();
|
||||
}
|
||||
|
||||
match SLOT_ENGINE.get().unwrap_or(&SlotExtractionEngine::None) {
|
||||
SlotExtractionEngine::None => HashMap::new(),
|
||||
SlotExtractionEngine::GLiNER => {
|
||||
match gliner::extract(text, slots) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
error!("GLiNER slot extraction failed: {}", e);
|
||||
HashMap::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
487
crates/jarvis-core/src/slots/gliner.rs
Normal file
487
crates/jarvis-core/src/slots/gliner.rs
Normal file
@@ -0,0 +1,487 @@
|
||||
// BASED ON: gline-rs crate source code
|
||||
// https://github.com/fbilhaut/gline-rs
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use once_cell::sync::OnceCell;
|
||||
use parking_lot::Mutex;
|
||||
use ndarray::Array;
|
||||
use regex::Regex;
|
||||
use tokenizers::Tokenizer;
|
||||
use ort::value::Tensor;
|
||||
|
||||
pub mod structs;
|
||||
use structs::GlinerModelInfo;
|
||||
|
||||
use std::fs;
|
||||
|
||||
use crate::commands::{SlotDefinition, SlotValue};
|
||||
use crate::{APP_DIR, i18n};
|
||||
|
||||
// MODEL STATE
|
||||
|
||||
struct GlinerModel {
|
||||
session: ort::session::Session,
|
||||
tokenizer: Tokenizer,
|
||||
splitter: Regex,
|
||||
}
|
||||
|
||||
unsafe impl Send for GlinerModel {}
|
||||
unsafe impl Sync for GlinerModel {}
|
||||
|
||||
static MODEL: OnceCell<Mutex<GlinerModel>> = OnceCell::new();
|
||||
|
||||
// GLiNER defaults (same as gline-rs Parameters::default())
|
||||
const THRESHOLD: f32 = 0.3;
|
||||
const MAX_WIDTH: usize = 12;
|
||||
const MAX_LENGTH: usize = 512;
|
||||
|
||||
// applied after decoding
|
||||
const MIN_CONFIDENCE: f32 = 0.4;
|
||||
|
||||
// word splitting regex (gline-rs RegexSplitter default)
|
||||
const WORD_REGEX: &str = r"\w+(?:[-_]\w+)*|\S";
|
||||
|
||||
// INIT
|
||||
|
||||
pub fn init() -> Result<(), String> {
|
||||
if MODEL.get().is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let variant = crate::DB.get()
|
||||
.map(|db| db.read().gliner_model.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let language = i18n::get_language();
|
||||
|
||||
let (model_dir, onnx_file) = if variant.is_empty() {
|
||||
(select_model_dir(), "model.onnx".to_string())
|
||||
} else {
|
||||
crate::gliner_models::resolve_model(&variant, &language)
|
||||
.unwrap_or_else(|| (select_model_dir(), "model.onnx".to_string()))
|
||||
};
|
||||
|
||||
let model_path = model_dir.join("onnx").join(&onnx_file);
|
||||
let tokenizer_path = model_dir.join("tokenizer.json");
|
||||
|
||||
info!("Loading GLiNER model from: {}, variant {}", model_dir.display(), variant);
|
||||
|
||||
let session = ort::session::Session::builder()
|
||||
.map_err(|e| format!("Failed to create ort session builder: {}", e))?
|
||||
.commit_from_file(&model_path)
|
||||
.map_err(|e| format!("Failed to load ONNX model: {}", e))?;
|
||||
|
||||
let tokenizer = Tokenizer::from_file(&tokenizer_path)
|
||||
.map_err(|e| format!("Failed to load tokenizer: {}", e))?;
|
||||
|
||||
let splitter = Regex::new(WORD_REGEX)
|
||||
.map_err(|e| format!("Failed to compile word regex: {}", e))?;
|
||||
|
||||
MODEL.set(Mutex::new(GlinerModel { session, tokenizer, splitter }))
|
||||
.map_err(|_| "GLiNER model already initialized".to_string())?;
|
||||
|
||||
info!("GLiNER model loaded");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn select_model_dir() -> PathBuf {
|
||||
let base = APP_DIR.join("resources").join("models");
|
||||
|
||||
match i18n::get_language().as_str() {
|
||||
"en" => {
|
||||
let path = base.join("gliner_small-v2.1");
|
||||
if path.exists() { return path; }
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// multilingual (covers RU, UA, EN)
|
||||
let multi = base.join("gliner_multi-v2.1");
|
||||
if multi.exists() { return multi; }
|
||||
|
||||
// fallback
|
||||
base.join("gliner_small-v2.1")
|
||||
}
|
||||
|
||||
// WORD SPLITTING
|
||||
|
||||
struct WordToken {
|
||||
start: usize,
|
||||
end: usize,
|
||||
text: String,
|
||||
}
|
||||
|
||||
fn split_words(splitter: &Regex, text: &str, limit: Option<usize>) -> Vec<WordToken> {
|
||||
let mut tokens = Vec::new();
|
||||
for m in splitter.find_iter(text) {
|
||||
tokens.push(WordToken {
|
||||
start: m.start(),
|
||||
end: m.end(),
|
||||
text: m.as_str().to_string(),
|
||||
});
|
||||
if let Some(lim) = limit {
|
||||
if tokens.len() >= lim { break; }
|
||||
}
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
// PROMPT CONSTRUCTION
|
||||
//
|
||||
// GLiNER prompt format:
|
||||
// [<<ENT>>, label1_w1, label1_w2, <<ENT>>, label2_w1, ..., <<SEP>>, word1, word2, ..., wordN]
|
||||
|
||||
fn build_prompt(entities: &[&str], words: &[WordToken]) -> (Vec<String>, usize) {
|
||||
let mut prompt = Vec::with_capacity(entities.len() * 2 + 1 + words.len());
|
||||
|
||||
for entity in entities {
|
||||
prompt.push("<<ENT>>".to_string());
|
||||
prompt.push(entity.to_string()); // whole string, no split
|
||||
}
|
||||
prompt.push("<<SEP>>".to_string());
|
||||
|
||||
let entities_len = prompt.len();
|
||||
|
||||
for w in words {
|
||||
prompt.push(w.text.clone());
|
||||
}
|
||||
|
||||
(prompt, entities_len)
|
||||
}
|
||||
|
||||
// ENCODING
|
||||
|
||||
struct EncodedBatch {
|
||||
input_ids: ndarray::Array2<i64>,
|
||||
attention_masks: ndarray::Array2<i64>,
|
||||
word_masks: ndarray::Array2<i64>,
|
||||
text_lengths: ndarray::Array2<i64>,
|
||||
num_words: usize,
|
||||
}
|
||||
|
||||
fn encode_single(
|
||||
tokenizer: &Tokenizer,
|
||||
_text: &str,
|
||||
entities: &[&str],
|
||||
words: &[WordToken],
|
||||
) -> Result<EncodedBatch, String> {
|
||||
let (prompt, ent_len) = build_prompt(entities, words);
|
||||
let text_word_count = words.len();
|
||||
|
||||
let mut word_encodings: Vec<Vec<u32>> = Vec::with_capacity(prompt.len());
|
||||
let mut total_tokens: usize = 2; // BOS + EOS
|
||||
let mut entity_tokens: usize = 0;
|
||||
|
||||
for (pos, word) in prompt.iter().enumerate() {
|
||||
let encoding = tokenizer.encode(word.as_str(), false)
|
||||
.map_err(|e| format!("Tokenizer encode error: {}", e))?;
|
||||
let ids = encoding.get_ids().to_vec();
|
||||
total_tokens += ids.len();
|
||||
if pos < ent_len {
|
||||
entity_tokens += ids.len();
|
||||
}
|
||||
word_encodings.push(ids);
|
||||
}
|
||||
|
||||
// text_offset: index where text tokens start (after BOS + entity tokens)
|
||||
let text_offset = entity_tokens + 1;
|
||||
|
||||
// DEBUG
|
||||
debug!("GLiNER prompt ({} total, ent_len={}, text_offset={}):", prompt.len(), ent_len, text_offset);
|
||||
for (i, (word, enc)) in prompt.iter().zip(word_encodings.iter()).enumerate() {
|
||||
debug!(" [{}]{} '{}' -> {:?}", i, if i < ent_len { " ENT" } else { " TXT" }, word, enc);
|
||||
}
|
||||
|
||||
let mut input_ids = Array::zeros((1, total_tokens));
|
||||
let mut attention_masks = Array::zeros((1, total_tokens));
|
||||
let mut word_masks = Array::zeros((1, total_tokens));
|
||||
|
||||
let mut idx: usize = 0;
|
||||
let mut word_id: i64 = 0;
|
||||
|
||||
// BOS
|
||||
input_ids[[0, idx]] = 1;
|
||||
attention_masks[[0, idx]] = 1;
|
||||
idx += 1;
|
||||
|
||||
// encode each word - matching gline-rs idx-based logic exactly
|
||||
for word_enc in word_encodings.iter() {
|
||||
for (token_idx, &token_id) in word_enc.iter().enumerate() {
|
||||
input_ids[[0, idx]] = token_id as i64;
|
||||
attention_masks[[0, idx]] = 1;
|
||||
// word mask: only for text tokens (past text_offset), first sub-token only
|
||||
if idx >= text_offset && token_idx == 0 {
|
||||
word_masks[[0, idx]] = word_id;
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
// increment word_id for any word whose tokens end past text_offset
|
||||
if idx >= text_offset {
|
||||
word_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// EOS
|
||||
input_ids[[0, idx]] = 2;
|
||||
attention_masks[[0, idx]] = 1;
|
||||
|
||||
let mut text_lengths = Array::zeros((1, 1));
|
||||
text_lengths[[0, 0]] = (text_word_count + 1) as i64;
|
||||
|
||||
debug!("GLiNER input_ids: {:?}", input_ids.as_slice().unwrap());
|
||||
debug!("GLiNER word_masks: {:?}", word_masks.as_slice().unwrap());
|
||||
debug!("GLiNER text_lengths: {}", text_word_count);
|
||||
|
||||
Ok(EncodedBatch {
|
||||
input_ids,
|
||||
attention_masks,
|
||||
word_masks,
|
||||
text_lengths,
|
||||
num_words: text_word_count + 1,
|
||||
})
|
||||
}
|
||||
|
||||
// SPAN TENSORS
|
||||
|
||||
fn make_span_tensors(num_words: usize, max_width: usize) -> (ndarray::Array3<i64>, ndarray::Array2<bool>) {
|
||||
let num_spans = num_words * max_width;
|
||||
|
||||
let mut span_idx = Array::zeros((1, num_spans, 2));
|
||||
let mut span_mask = Array::from_elem((1, num_spans), false);
|
||||
|
||||
for start in 0..num_words {
|
||||
let remaining = num_words - start;
|
||||
let actual_max = max_width.min(remaining);
|
||||
for width in 0..actual_max {
|
||||
let dim = start * max_width + width;
|
||||
span_idx[[0, dim, 0]] = start as i64;
|
||||
span_idx[[0, dim, 1]] = (start + width) as i64;
|
||||
span_mask[[0, dim]] = true;
|
||||
}
|
||||
}
|
||||
|
||||
(span_idx, span_mask)
|
||||
}
|
||||
|
||||
// DECODE + GREEDY SEARCH
|
||||
|
||||
fn sigmoid(x: f32) -> f32 {
|
||||
1.0 / (1.0 + (-x).exp())
|
||||
}
|
||||
|
||||
struct Entity {
|
||||
text: String,
|
||||
label: String,
|
||||
prob: f32,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
fn decode_and_search(
|
||||
logits_data: &[f32],
|
||||
logits_shape: &[usize],
|
||||
words: &[WordToken],
|
||||
text: &str,
|
||||
entities: &[&str],
|
||||
max_width: usize,
|
||||
threshold: f32,
|
||||
) -> Vec<Entity> {
|
||||
let num_tokens = words.len();
|
||||
|
||||
let dim_mw = logits_shape.get(2).copied().unwrap_or(0);
|
||||
let dim_e = logits_shape.get(3).copied().unwrap_or(0);
|
||||
|
||||
let mut spans: Vec<Entity> = Vec::new();
|
||||
|
||||
for start in 1..=num_tokens {
|
||||
let max_end = (start + max_width).min(num_tokens + 1);
|
||||
for end in start..max_end {
|
||||
let width = end - start;
|
||||
for (class_idx, &entity_label) in entities.iter().enumerate() {
|
||||
let flat_idx = start * dim_mw * dim_e + width * dim_e + class_idx;
|
||||
if flat_idx >= logits_data.len() { continue; }
|
||||
|
||||
let raw_score = logits_data[flat_idx];
|
||||
let prob = sigmoid(raw_score);
|
||||
if prob >= threshold {
|
||||
let w_start = start - 1;
|
||||
let w_end = end - 1;
|
||||
let start_offset = words[w_start].start;
|
||||
let end_offset = words[w_end].end;
|
||||
let span_text = text[start_offset..end_offset].to_string();
|
||||
spans.push(Entity {
|
||||
text: span_text,
|
||||
label: entity_label.to_string(),
|
||||
prob,
|
||||
start: start_offset,
|
||||
end: end_offset,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spans.sort_unstable_by(|a, b| (a.start, a.end).cmp(&(b.start, b.end)));
|
||||
greedy_flat(&spans)
|
||||
}
|
||||
|
||||
fn greedy_flat(spans: &[Entity]) -> Vec<Entity> {
|
||||
if spans.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut result: Vec<Entity> = Vec::new();
|
||||
let mut prev = 0usize;
|
||||
let mut next = 1usize;
|
||||
|
||||
while next < spans.len() {
|
||||
let p = &spans[prev];
|
||||
let n = &spans[next];
|
||||
|
||||
if n.start >= p.end || p.start >= n.end {
|
||||
result.push(Entity {
|
||||
text: p.text.clone(),
|
||||
label: p.label.clone(),
|
||||
prob: p.prob,
|
||||
start: p.start,
|
||||
end: p.end,
|
||||
});
|
||||
prev = next;
|
||||
} else if p.prob < n.prob {
|
||||
prev = next;
|
||||
}
|
||||
next += 1;
|
||||
}
|
||||
|
||||
let last = &spans[prev];
|
||||
result.push(Entity {
|
||||
text: last.text.clone(),
|
||||
label: last.label.clone(),
|
||||
prob: last.prob,
|
||||
start: last.start,
|
||||
end: last.end,
|
||||
});
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// PUBLIC API
|
||||
|
||||
pub fn extract(
|
||||
text: &str,
|
||||
slots: &HashMap<String, SlotDefinition>,
|
||||
) -> Result<HashMap<String, SlotValue>, String> {
|
||||
let mut model = MODEL.get().ok_or("GLiNER not initialized")?.lock();
|
||||
|
||||
let mut label_to_slots: HashMap<&str, Vec<&str>> = HashMap::new();
|
||||
for (slot_name, def) in slots {
|
||||
if !def.entity.is_empty() {
|
||||
label_to_slots
|
||||
.entry(def.entity.as_str())
|
||||
.or_default()
|
||||
.push(slot_name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
if label_to_slots.is_empty() {
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
let labels: Vec<&str> = label_to_slots.keys().copied().collect();
|
||||
|
||||
debug!("GLiNER extract: text='{}', labels={:?}", text, labels);
|
||||
|
||||
let words = split_words(&model.splitter, text, Some(MAX_LENGTH));
|
||||
if words.is_empty() {
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
let encoded = encode_single(&model.tokenizer, text, &labels, &words)?;
|
||||
|
||||
let (span_idx, span_mask) = make_span_tensors(encoded.num_words, MAX_WIDTH);
|
||||
|
||||
let t_input_ids = Tensor::from_array(encoded.input_ids).map_err(|e| format!("tensor: {}", e))?;
|
||||
let t_attn = Tensor::from_array(encoded.attention_masks).map_err(|e| format!("tensor: {}", e))?;
|
||||
let t_words = Tensor::from_array(encoded.word_masks).map_err(|e| format!("tensor: {}", e))?;
|
||||
let t_lengths = Tensor::from_array(encoded.text_lengths).map_err(|e| format!("tensor: {}", e))?;
|
||||
let t_span_idx = Tensor::from_array(span_idx).map_err(|e| format!("tensor: {}", e))?;
|
||||
let t_span_mask = Tensor::from_array(span_mask).map_err(|e| format!("tensor: {}", e))?;
|
||||
|
||||
let outputs = model.session.run(
|
||||
ort::inputs! {
|
||||
"input_ids" => t_input_ids,
|
||||
"attention_mask" => t_attn,
|
||||
"words_mask" => t_words,
|
||||
"text_lengths" => t_lengths,
|
||||
"span_idx" => t_span_idx,
|
||||
"span_mask" => t_span_mask,
|
||||
}
|
||||
).map_err(|e| format!("ort inference error: {}", e))?;
|
||||
|
||||
let (shape, logits_data) = outputs["logits"]
|
||||
.try_extract_tensor::<f32>()
|
||||
.map_err(|e| format!("Failed to extract logits: {}", e))?;
|
||||
|
||||
let logits_shape: Vec<usize> = shape.iter().map(|&d| d as usize).collect();
|
||||
|
||||
debug!("GLiNER logits shape: {:?}, data len: {}", logits_shape, logits_data.len());
|
||||
let max_logit = logits_data.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
|
||||
debug!("GLiNER max logit: {:.4}, sigmoid: {:.4}", max_logit, sigmoid(max_logit));
|
||||
|
||||
// dump all scores above 5%
|
||||
let num_words = logits_shape.get(1).copied().unwrap_or(0);
|
||||
let dim_mw = logits_shape.get(2).copied().unwrap_or(0);
|
||||
let dim_e = logits_shape.get(3).copied().unwrap_or(0);
|
||||
for start in 0..num_words {
|
||||
for width in 0..dim_mw.min(num_words - start) {
|
||||
for class_idx in 0..dim_e {
|
||||
let flat_idx = start * dim_mw * dim_e + width * dim_e + class_idx;
|
||||
if flat_idx < logits_data.len() {
|
||||
let score = logits_data[flat_idx];
|
||||
let prob = sigmoid(score);
|
||||
if prob > 0.05 {
|
||||
let end = start + width;
|
||||
let w_start = if start < words.len() { &words[start].text } else { "?" };
|
||||
let w_end = if end < words.len() { &words[end].text } else { "?" };
|
||||
debug!(" span[{}..{}] '{}'->'{}' label={} score={:.3} prob={:.3}",
|
||||
start, end, w_start, w_end, labels.get(class_idx).unwrap_or(&"?"), score, prob);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let entities = decode_and_search(
|
||||
logits_data, &logits_shape, &words, text, &labels, MAX_WIDTH, THRESHOLD,
|
||||
);
|
||||
|
||||
let mut result = HashMap::new();
|
||||
|
||||
for entity in &entities {
|
||||
if entity.prob < MIN_CONFIDENCE {
|
||||
continue;
|
||||
}
|
||||
|
||||
debug!("GLiNER entity: '{}' -> '{}' ({:.1}%)",
|
||||
entity.text, entity.label, entity.prob * 100.0);
|
||||
|
||||
if let Some(slot_names) = label_to_slots.get(entity.label.as_str()) {
|
||||
for &slot_name in slot_names {
|
||||
if !result.contains_key(slot_name) {
|
||||
let value = parse_slot_value(&entity.text);
|
||||
result.insert(slot_name.to_string(), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn parse_slot_value(text: &str) -> SlotValue {
|
||||
if let Ok(n) = text.parse::<f64>() {
|
||||
return SlotValue::Number(n);
|
||||
}
|
||||
SlotValue::Text(text.to_string())
|
||||
}
|
||||
7
crates/jarvis-core/src/slots/gliner/structs.rs
Normal file
7
crates/jarvis-core/src/slots/gliner/structs.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GlinerModelInfo {
|
||||
pub model_dir: String,
|
||||
pub file_name: String,
|
||||
pub display_name: String,
|
||||
pub value: String,
|
||||
}
|
||||
@@ -163,10 +163,26 @@ fn get_configured_model_path() -> Result<std::path::PathBuf, String> {
|
||||
}
|
||||
}
|
||||
|
||||
// auto-detect: use first available model
|
||||
// auto-detect: prefer model matching current language
|
||||
let available = vosk_models::scan_vosk_models();
|
||||
let language = i18n::get_language();
|
||||
|
||||
// try language match first
|
||||
let lang_code = match language.as_str() {
|
||||
"ru" => "ru",
|
||||
"en" => "us", // vosk uses "us" not "en"
|
||||
"ua" => "uk", // vosk uses "uk" not "ua"
|
||||
other => other,
|
||||
};
|
||||
|
||||
if let Some(matched) = available.iter().find(|m| m.language == lang_code) {
|
||||
info!("Auto-detected Vosk model for '{}': {}", language, matched.name);
|
||||
return Ok(matched.path.clone());
|
||||
}
|
||||
|
||||
// fallback to first available
|
||||
if let Some(first) = available.first() {
|
||||
info!("Auto-detected Vosk model: {}", first.name);
|
||||
info!("Auto-detected Vosk model (no language match): {}", first.name);
|
||||
return Ok(first.path.clone());
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ pub fn scan_vosk_models() -> Vec<VoskModelInfo> {
|
||||
};
|
||||
let mut models = Vec::new();
|
||||
|
||||
info!("TESTTTTTTTTTTTTT: {}", models_dir.display());
|
||||
info!("VOSK MODELS DIR: {}", models_dir.display());
|
||||
|
||||
if !models_dir.exists() {
|
||||
warn!("Vosk models directory not found: {}", models_dir.display());
|
||||
|
||||
@@ -85,6 +85,9 @@ fn main() {
|
||||
// vosk
|
||||
tauri_commands::list_vosk_models,
|
||||
|
||||
// gliner
|
||||
tauri_commands::list_gliner_models,
|
||||
|
||||
// i18n
|
||||
tauri_commands::get_translations,
|
||||
tauri_commands::translate,
|
||||
|
||||
@@ -10,6 +10,8 @@ pub fn db_read(state: tauri::State<'_, AppState>, key: &str) -> String {
|
||||
"assistant_voice" => settings.voice.clone(),
|
||||
"selected_wake_word_engine" => format!("{:?}", settings.wake_word_engine),
|
||||
"selected_intent_recognition_engine" => format!("{:?}", settings.intent_recognition_engine),
|
||||
"selected_slot_extraction_engine" => format!("{:?}", settings.slot_extraction_engine),
|
||||
"selected_gliner_model" => settings.gliner_model.clone(),
|
||||
"selected_vosk_model" => settings.vosk_model.clone(),
|
||||
"speech_to_text_engine" => format!("{:?}", settings.speech_to_text_engine),
|
||||
"noise_suppression" => format!("{:?}", settings.noise_suppression),
|
||||
@@ -54,6 +56,16 @@ pub fn db_write(state: tauri::State<'_, AppState>, key: &str, val: &str) -> bool
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
"selected_slot_extraction_engine" => {
|
||||
match val.to_lowercase().as_str() {
|
||||
"none" => settings.slot_extraction_engine = jarvis_core::config::structs::SlotExtractionEngine::None,
|
||||
"gliner" => settings.slot_extraction_engine = jarvis_core::config::structs::SlotExtractionEngine::GLiNER,
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
"selected_gliner_model" => {
|
||||
settings.gliner_model = val.to_string();
|
||||
}
|
||||
"selected_vosk_model" => {
|
||||
settings.vosk_model = val.to_string();
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use jarvis_core::{vosk_models, DB};
|
||||
use jarvis_core::{vosk_models, gliner_models};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -8,6 +8,12 @@ pub struct VoskModel {
|
||||
pub size: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct GlinerVariant {
|
||||
pub display_name: String,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub fn list_vosk_models() -> Vec<VoskModel> {
|
||||
vosk_models::scan_vosk_models()
|
||||
@@ -18,4 +24,15 @@ pub fn list_vosk_models() -> Vec<VoskModel> {
|
||||
size: m.size,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub fn list_gliner_models() -> Vec<GlinerVariant> {
|
||||
gliner_models::scan_gliner_variants()
|
||||
.into_iter()
|
||||
.map(|m| GlinerVariant {
|
||||
display_name: m.display_name,
|
||||
value: m.value,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
|
||||
let availableMicrophones: MicrophoneOption[] = []
|
||||
let availableVoskModels: { label: string; value: string }[] = []
|
||||
let availableGlinerModels: { label: string; value: string }[] = []
|
||||
let settingsSaved = false
|
||||
let saveButtonDisabled = false
|
||||
|
||||
@@ -75,6 +76,8 @@
|
||||
let selectedMicrophone = ""
|
||||
let selectedWakeWordEngine = ""
|
||||
let selectedIntentRecognitionEngine = ""
|
||||
let selectedSlotExtractionEngine = ""
|
||||
let selectedGlinerModel = ""
|
||||
let selectedVoskModel = ""
|
||||
let selectedNoiseSuppression = ""
|
||||
let selectedVad = ""
|
||||
@@ -105,6 +108,8 @@
|
||||
invoke("db_write", { key: "selected_microphone", val: selectedMicrophone }),
|
||||
invoke("db_write", { key: "selected_wake_word_engine", val: selectedWakeWordEngine }),
|
||||
invoke("db_write", { key: "selected_intent_recognition_engine", val: selectedIntentRecognitionEngine }),
|
||||
invoke("db_write", { key: "selected_slot_extraction_engine", val: selectedSlotExtractionEngine }),
|
||||
invoke("db_write", { key: "selected_gliner_model", val: selectedGlinerModel }),
|
||||
invoke("db_write", { key: "selected_vosk_model", val: selectedVoskModel }),
|
||||
|
||||
invoke("db_write", { key: "noise_suppression", val: selectedNoiseSuppression }),
|
||||
@@ -173,13 +178,22 @@
|
||||
value: m.name
|
||||
}))
|
||||
|
||||
// load gliner models
|
||||
const glinerModels = await invoke<{ display_name: string; value: string }[]>("list_gliner_models")
|
||||
availableGlinerModels = glinerModels.map(m => ({
|
||||
label: m.display_name,
|
||||
value: m.value,
|
||||
}))
|
||||
|
||||
// load settings from db
|
||||
const [mic, wakeWord, intentReco, voskModel,
|
||||
const [mic, wakeWord, intentReco, slotEngine, glinerModel, voskModel,
|
||||
noiseSuppression, vad, gainNormalizer,
|
||||
pico, openai] = await Promise.all([
|
||||
invoke<string>("db_read", { key: "selected_microphone" }),
|
||||
invoke<string>("db_read", { key: "selected_wake_word_engine" }),
|
||||
invoke<string>("db_read", { key: "selected_intent_recognition_engine" }),
|
||||
invoke<string>("db_read", { key: "selected_slot_extraction_engine" }),
|
||||
invoke<string>("db_read", { key: "selected_gliner_model" }),
|
||||
invoke<string>("db_read", { key: "selected_vosk_model" }),
|
||||
|
||||
invoke<string>("db_read", { key: "noise_suppression" }),
|
||||
@@ -193,7 +207,9 @@
|
||||
selectedMicrophone = mic
|
||||
selectedWakeWordEngine = wakeWord
|
||||
selectedIntentRecognitionEngine = intentReco
|
||||
selectedSlotExtractionEngine = slotEngine
|
||||
selectedVoskModel = voskModel
|
||||
selectedGlinerModel = glinerModel
|
||||
selectedNoiseSuppression = noiseSuppression
|
||||
selectedVad = vad
|
||||
gainNormalizerEnabled = gainNormalizer === "true"
|
||||
@@ -368,7 +384,43 @@
|
||||
/>
|
||||
|
||||
<Space h="xl" />
|
||||
<NativeSelect
|
||||
data={[
|
||||
{ label: t('settings-disabled'), value: "None" },
|
||||
{ label: "GLiNER (NER)", value: "GLiNER" }
|
||||
]}
|
||||
label={t('settings-slot-engine')}
|
||||
description={t('settings-slot-engine-desc')}
|
||||
variant="filled"
|
||||
bind:value={selectedSlotExtractionEngine}
|
||||
/>
|
||||
|
||||
{#if selectedSlotExtractionEngine === "GLiNER"}
|
||||
<Space h="sm" />
|
||||
{#key availableGlinerModels}
|
||||
<NativeSelect
|
||||
data={[
|
||||
{ label: t('settings-auto-detect'), value: "" },
|
||||
...availableGlinerModels
|
||||
]}
|
||||
label={t('settings-gliner-model')}
|
||||
description={t('settings-gliner-model-desc')}
|
||||
variant="filled"
|
||||
bind:value={selectedGlinerModel}
|
||||
/>
|
||||
{/key}
|
||||
|
||||
{#if availableGlinerModels.length === 0}
|
||||
<Space h="sm" />
|
||||
<Alert title={t('settings-models-not-found')} color="orange" variant="outline">
|
||||
<Text size="sm" color="gray">
|
||||
{t('settings-gliner-models-hint')}
|
||||
</Text>
|
||||
</Alert>
|
||||
{/if}
|
||||
{/if}
|
||||
|
||||
<Space h="xl" />
|
||||
<NativeSelect
|
||||
data={[
|
||||
{ label: t('settings-disabled'), value: "None" },
|
||||
|
||||
118
post_build.py
118
post_build.py
@@ -7,6 +7,7 @@ import os
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import sys
|
||||
import filecmp
|
||||
|
||||
# some config vars
|
||||
# format: (source, destination_name)
|
||||
@@ -37,8 +38,87 @@ TARGET_DIRS = (
|
||||
|
||||
ABS_PATH = os.getcwd() + "/"
|
||||
|
||||
# check for force flag
|
||||
force_overwrite = "-force" in sys.argv
|
||||
# flags
|
||||
force_overwrite = "--force" in sys.argv
|
||||
sync_mode = "--sync" in sys.argv
|
||||
|
||||
if sync_mode:
|
||||
print("[*] Sync mode: will update changed files and remove orphans")
|
||||
|
||||
|
||||
def files_differ(src, dst):
|
||||
"""check if two files differ by size or content"""
|
||||
if not os.path.isfile(dst):
|
||||
return True
|
||||
# quick check: size
|
||||
if os.path.getsize(src) != os.path.getsize(dst):
|
||||
return True
|
||||
# compare modification time (src newer = needs update)
|
||||
if os.path.getmtime(src) > os.path.getmtime(dst):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def sync_directory(src_dir, dst_dir):
|
||||
"""sync dst_dir to match src_dir: copy new/changed, remove orphans"""
|
||||
copied = 0
|
||||
updated = 0
|
||||
removed = 0
|
||||
|
||||
# walk source, copy new/changed files
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
rel_root = os.path.relpath(root, src_dir)
|
||||
dst_root = os.path.join(dst_dir, rel_root) if rel_root != "." else dst_dir
|
||||
|
||||
# ensure dir exists
|
||||
os.makedirs(dst_root, exist_ok=True)
|
||||
|
||||
for f in files:
|
||||
src_file = os.path.join(root, f)
|
||||
dst_file = os.path.join(dst_root, f)
|
||||
|
||||
if not os.path.exists(dst_file):
|
||||
shutil.copy2(src_file, dst_file)
|
||||
copied += 1
|
||||
elif files_differ(src_file, dst_file):
|
||||
shutil.copy2(src_file, dst_file)
|
||||
updated += 1
|
||||
|
||||
# walk destination, remove files/dirs not in source
|
||||
for root, dirs, files in os.walk(dst_dir, topdown=False):
|
||||
rel_root = os.path.relpath(root, dst_dir)
|
||||
src_root = os.path.join(src_dir, rel_root) if rel_root != "." else src_dir
|
||||
|
||||
for f in files:
|
||||
dst_file = os.path.join(root, f)
|
||||
src_file = os.path.join(src_root, f)
|
||||
if not os.path.exists(src_file):
|
||||
os.remove(dst_file)
|
||||
print(f" [-] Removed orphan file: {os.path.relpath(dst_file, dst_dir)}")
|
||||
removed += 1
|
||||
|
||||
for d in dirs:
|
||||
dst_sub = os.path.join(root, d)
|
||||
src_sub = os.path.join(src_root, d)
|
||||
if not os.path.exists(src_sub):
|
||||
shutil.rmtree(dst_sub)
|
||||
print(f" [-] Removed orphan dir: {os.path.relpath(dst_sub, dst_dir)}")
|
||||
removed += 1
|
||||
|
||||
return copied, updated, removed
|
||||
|
||||
|
||||
def sync_file(src_path, dst_path):
|
||||
"""sync a single file, returns (copied, updated)"""
|
||||
if not os.path.exists(dst_path):
|
||||
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
|
||||
shutil.copy2(src_path, dst_path)
|
||||
return 1, 0
|
||||
elif files_differ(src_path, dst_path):
|
||||
shutil.copy2(src_path, dst_path)
|
||||
return 0, 1
|
||||
return 0, 0
|
||||
|
||||
|
||||
for tdir in TARGET_DIRS:
|
||||
tdir = ABS_PATH + tdir
|
||||
@@ -47,7 +127,6 @@ for tdir in TARGET_DIRS:
|
||||
print("Skipping target, not a directory: ", tdir)
|
||||
continue
|
||||
|
||||
# copy lib files
|
||||
for entry in SOURCE:
|
||||
if isinstance(entry, tuple):
|
||||
src, dest_name = entry
|
||||
@@ -57,11 +136,22 @@ for tdir in TARGET_DIRS:
|
||||
src_path = ABS_PATH + src
|
||||
|
||||
if os.path.isdir(src_path):
|
||||
# copy the whole directory
|
||||
target_name = dest_name if dest_name else os.path.basename(src.rstrip('/'))
|
||||
full_target_dir_path = os.path.join(tdir, target_name)
|
||||
|
||||
if os.path.isdir(full_target_dir_path):
|
||||
|
||||
if sync_mode:
|
||||
# sync: update changed, add new, remove orphans
|
||||
if os.path.isdir(full_target_dir_path):
|
||||
c, u, r = sync_directory(src_path, full_target_dir_path)
|
||||
if c or u or r:
|
||||
print(f"[~] Synced: {src} -> {target_name} (+{c} new, ~{u} updated, -{r} removed)")
|
||||
else:
|
||||
print(f"[=] Up to date: {src} -> {target_name}")
|
||||
else:
|
||||
shutil.copytree(src_path, full_target_dir_path)
|
||||
print("[+] Directory copied: ", src, "->", target_name)
|
||||
|
||||
elif os.path.isdir(full_target_dir_path):
|
||||
if force_overwrite:
|
||||
shutil.rmtree(full_target_dir_path)
|
||||
shutil.copytree(src_path, full_target_dir_path)
|
||||
@@ -73,11 +163,19 @@ for tdir in TARGET_DIRS:
|
||||
print("[+] Directory copied: ", src, "->", target_name)
|
||||
|
||||
elif os.path.isfile(src_path):
|
||||
# copy file
|
||||
target_name = dest_name if dest_name else os.path.basename(src)
|
||||
full_target_file_path = os.path.join(tdir, target_name)
|
||||
|
||||
if os.path.isfile(full_target_file_path):
|
||||
|
||||
if sync_mode:
|
||||
c, u = sync_file(src_path, full_target_file_path)
|
||||
if c:
|
||||
print("[+] File copied: ", src, "->", target_name)
|
||||
elif u:
|
||||
print("[~] File updated: ", src, "->", target_name)
|
||||
else:
|
||||
print("[=] Up to date: ", src, "->", target_name)
|
||||
|
||||
elif os.path.isfile(full_target_file_path):
|
||||
if force_overwrite:
|
||||
os.remove(full_target_file_path)
|
||||
shutil.copy(src_path, full_target_file_path)
|
||||
@@ -90,4 +188,4 @@ for tdir in TARGET_DIRS:
|
||||
else:
|
||||
print("[?] Unknown entity to copy: ", src)
|
||||
|
||||
print("Post compile build done.")
|
||||
print("Post compile build done.")
|
||||
@@ -1,14 +0,0 @@
|
||||
[[commands]]
|
||||
id = "hello"
|
||||
type = "lua"
|
||||
script = "script.lua"
|
||||
sandbox = "minimal"
|
||||
timeout = 5000
|
||||
phrases.ru = [
|
||||
"привет",
|
||||
"здравствуй",
|
||||
]
|
||||
phrases.en = [
|
||||
"hello",
|
||||
"hi",
|
||||
]
|
||||
@@ -1,21 +0,0 @@
|
||||
-- simple test hello command
|
||||
|
||||
local lang = jarvis.context.language
|
||||
local hour = tonumber(jarvis.context.time.hour)
|
||||
|
||||
-- determine greeting based on time
|
||||
local greeting
|
||||
if hour >= 5 and hour < 12 then
|
||||
greeting = lang == "ru" and "Доброе утро" or "Good morning"
|
||||
elseif hour >= 12 and hour < 17 then
|
||||
greeting = lang == "ru" and "Добрый день" or "Good afternoon"
|
||||
elseif hour >= 17 and hour < 22 then
|
||||
greeting = lang == "ru" and "Добрый вечер" or "Good evening"
|
||||
else
|
||||
greeting = lang == "ru" and "Доброй ночи" or "Good night"
|
||||
end
|
||||
|
||||
jarvis.log("info", "Greeting user: " .. greeting)
|
||||
jarvis.audio.play_reply()
|
||||
|
||||
return { chain = true }
|
||||
14
resources/commands/test_slots/command.toml
Normal file
14
resources/commands/test_slots/command.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[[commands]]
|
||||
id = "test_greet_name"
|
||||
type = "lua"
|
||||
script = "greet.lua"
|
||||
|
||||
[commands.phrases]
|
||||
en = ["say hello to {name}", "greet {name}"]
|
||||
ru = ["поздоровайся с {name}", "привет {name}"]
|
||||
|
||||
[commands.slots.name]
|
||||
entity = "person name"
|
||||
|
||||
[commands.slots.city]
|
||||
entity = "city name"
|
||||
2
resources/commands/test_slots/greet.lua
Normal file
2
resources/commands/test_slots/greet.lua
Normal file
@@ -0,0 +1,2 @@
|
||||
local name = jarvis.context.slots.name or "[UNKNOWN]"
|
||||
print("Hello, " .. name .. "!")
|
||||
@@ -6,19 +6,16 @@ timeout = 5000
|
||||
sandbox = "standard"
|
||||
|
||||
[commands.phrases]
|
||||
ru = [
|
||||
"какая погода",
|
||||
"погода"
|
||||
]
|
||||
en = [
|
||||
"what's the weather",
|
||||
"weather"
|
||||
]
|
||||
ru = ["какая погода в {city}", "погода {city}"]
|
||||
en = ["what's the weather in {city}", "weather in {city}"]
|
||||
|
||||
[commands.sounds]
|
||||
ru = ["weather_ru_1", "weather_ru_2"]
|
||||
en = ["weather_en_1", "weather_en_2"]
|
||||
|
||||
[commands.slots.city]
|
||||
entity = "city name"
|
||||
context = ["in", "for", "at", "в", "для"]
|
||||
|
||||
|
||||
[[commands]]
|
||||
|
||||
Reference in New Issue
Block a user