commit 657230c90485581947362d3c62af03bfcd53a02b Author: Abraham Date: Thu Dec 15 04:17:16 2022 +0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9bb619f --- /dev/null +++ b/.gitignore @@ -0,0 +1,185 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject +### VirtualEnv template +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# idea folder, uncomment if you don't need it +# .idea \ No newline at end of file diff --git a/_stt.py b/_stt.py new file mode 100644 index 0000000..efa903d --- /dev/null +++ b/_stt.py @@ -0,0 +1,59 @@ +import torch +import sounddevice as sd +import speech_recognition as sr +import time +import numpy +from glob import glob + +device = torch.device('cpu') +model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models', + model='silero_stt', + language='en', # en, ru + device=device) +(read_batch, split_into_batches, + read_audio, prepare_model_input) = utils + +def callback(_r, audio): + try: + # CONVERT raw wav data to NumPy array + # wav_raw = audio.get_wav_data() + # data_s16 = numpy.frombuffer(wav_raw, dtype=numpy.int16, count=len(wav_raw) // 2, offset=0) + # np_audio = data_s16 * 0.5 ** 15 + + # Play it via sounddevice + #sd.play(np_audio, m.SAMPLE_RATE) + #time.sleep(len(np_audio) / m.SAMPLE_RATE) + #sd.stop() + + print("Распознание ...") + + # TODO: fix crutch, pass audio data directly as a model input of Silero STT + with open('speech.wav', 'wb') as f: + f.write(audio.get_wav_data()) + + test_files = glob('speech.wav') + batches = split_into_batches(test_files, batch_size=10) + input = prepare_model_input(read_batch(batches[0]), + device=device) + + output = model(input) + for example in output: + print(decoder(example.cpu())) + + # voice = recognizer.recognize_google(audio, language="ru-RU").lower() + # print("[log] Распознано: " + voice) + + except sr.UnknownValueError: + print("[log] Голос не распознан!") + + +# запуск +r = sr.Recognizer() +r.pause_threshold = 0.5 +m = sr.Microphone(device_index=1) + +with m as source: + r.adjust_for_ambient_noise(source) + +stop_listening = r.listen_in_background(m, callback) +while True: time.sleep(0.1) diff --git a/config.py b/config.py new file mode 100644 index 0000000..f926c06 --- /dev/null +++ b/config.py @@ -0,0 +1,14 @@ +VA_NAME = 'Кеша' + +VA_VER = "2.0" + +VA_ALIAS = ('кеша', 'кеш', 'инокентий', 'иннокентий', 'кишун', 'киш', 'кишаня', 'кешечка', 'кэш', 'кэша') + +VA_TBR = ('скажи', 'покажи', 'ответь', 'произнеси', 'расскажи', 'сколько') + +VA_CMD_LIST = { + "help": ('список команд', 'команды', 'что ты умеешь', 'твои навыки', 'навыки'), + "ctime": ('время', 'текущее время', 'сейчас времени', 'который час'), + "joke": ('расскажи анекдот', 'рассмеши', 'шутка', 'расскажи шутку', 'пошути', 'развесели'), + "open_browser": ('открой браузер', 'запусти браузер', 'открой гугл хром', 'гугл хром') +} \ No newline at end of file diff --git a/latest_silero_models.yml b/latest_silero_models.yml new file mode 100644 index 0000000..79b0281 --- /dev/null +++ b/latest_silero_models.yml @@ -0,0 +1,507 @@ +# pre-trained STT models +stt_models: + en: + latest: + meta: + name: "en_v6" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit: "https://models.silero.ai/models/en/en_v6.jit" + onnx: "https://models.silero.ai/models/en/en_v5.onnx" + jit_q: "https://models.silero.ai/models/en/en_v6_q.jit" + jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit" + onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx" + v6: + meta: + name: "en_v6" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit: "https://models.silero.ai/models/en/en_v6.jit" + onnx: "https://models.silero.ai/models/en/en_v5.onnx" + jit_q: "https://models.silero.ai/models/en/en_v6_q.jit" + jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit" + onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx" + v5: + meta: + name: "en_v5" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit: "https://models.silero.ai/models/en/en_v5.jit" + onnx: "https://models.silero.ai/models/en/en_v5.onnx" + onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx" + jit_q: "https://models.silero.ai/models/en/en_v5_q.jit" + jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit" + onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx" + v4_0: + meta: + name: "en_v4_0" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model" + onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx" + v3: + meta: + name: "en_v3" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit: "https://models.silero.ai/models/en/en_v3_jit.model" + onnx: "https://models.silero.ai/models/en/en_v3.onnx" + jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model" + jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model" + jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model" + onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx" + jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model" + jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model" + onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx" + v2: + meta: + name: "en_v2" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit: "https://models.silero.ai/models/en/en_v2_jit.model" + onnx: "https://models.silero.ai/models/en/en_v2.onnx" + tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz" + v1: + meta: + name: "en_v1" + sample: "https://models.silero.ai/examples/en_sample.wav" + labels: "https://models.silero.ai/models/en/en_v1_labels.json" + jit: "https://models.silero.ai/models/en/en_v1_jit.model" + onnx: "https://models.silero.ai/models/en/en_v1.onnx" + tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz" + de: + latest: + meta: + name: "de_v1" + sample: "https://models.silero.ai/examples/de_sample.wav" + labels: "https://models.silero.ai/models/de/de_v1_labels.json" + jit: "https://models.silero.ai/models/de/de_v1_jit.model" + onnx: "https://models.silero.ai/models/de/de_v1.onnx" + tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz" + v1: + meta: + name: "de_v1" + sample: "https://models.silero.ai/examples/de_sample.wav" + labels: "https://models.silero.ai/models/de/de_v1_labels.json" + jit_large: "https://models.silero.ai/models/de/de_v1_jit.model" + onnx: "https://models.silero.ai/models/de/de_v1.onnx" + tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz" + v3: + meta: + name: "de_v3" + sample: "https://models.silero.ai/examples/de_sample.wav" + labels: "https://models.silero.ai/models/de/de_v1_labels.json" + jit_large: "https://models.silero.ai/models/de/de_v3_large.jit" + v4: + meta: + name: "de_v4" + sample: "https://models.silero.ai/examples/de_sample.wav" + labels: "https://models.silero.ai/models/de/de_v1_labels.json" + jit_large: "https://models.silero.ai/models/de/de_v4_large.jit" + onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx" + es: + latest: + meta: + name: "es_v1" + sample: "https://models.silero.ai/examples/es_sample.wav" + labels: "https://models.silero.ai/models/es/es_v1_labels.json" + jit: "https://models.silero.ai/models/es/es_v1_jit.model" + onnx: "https://models.silero.ai/models/es/es_v1.onnx" + tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz" + ua: + latest: + meta: + name: "ua_v3" + sample: "https://models.silero.ai/examples/ua_sample.wav" + credits: + datasets: + speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk + labels: "https://models.silero.ai/models/ua/ua_v1_labels.json" + jit: "https://models.silero.ai/models/ua/ua_v3_jit.model" + jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model" + onnx: "https://models.silero.ai/models/ua/ua_v3.onnx" + v3: + meta: + name: "ua_v3" + sample: "https://models.silero.ai/examples/ua_sample.wav" + credits: + datasets: + speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk + labels: "https://models.silero.ai/models/ua/ua_v1_labels.json" + jit: "https://models.silero.ai/models/ua/ua_v3_jit.model" + jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model" + onnx: "https://models.silero.ai/models/ua/ua_v3.onnx" + v1: + meta: + name: "ua_v1" + sample: "https://models.silero.ai/examples/ua_sample.wav" + credits: + datasets: + speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk + labels: "https://models.silero.ai/models/ua/ua_v1_labels.json" + jit: "https://models.silero.ai/models/ua/ua_v1_jit.model" + jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model" +tts_models: + ru: + ru_v3: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt' + sample_rate: [8000, 24000, 48000] + aidar_v2: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt' + sample_rate: [8000, 16000] + aidar_8khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit' + sample_rate: 8000 + aidar_16khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit' + sample_rate: 16000 + baya_v2: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt' + sample_rate: [8000, 16000] + baya_8khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit' + sample_rate: 8000 + baya_16khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit' + sample_rate: 16000 + irina_v2: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt' + sample_rate: [8000, 16000] + irina_8khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit' + sample_rate: 8000 + irina_16khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit' + sample_rate: 16000 + kseniya_v2: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt' + sample_rate: [8000, 16000] + kseniya_8khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit' + sample_rate: 8000 + kseniya_16khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit' + sample_rate: 16000 + natasha_v2: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt' + sample_rate: [8000, 16000] + natasha_8khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit' + sample_rate: 8000 + natasha_16khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit' + sample_rate: 16000 + ruslan_v2: + latest: + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt' + sample_rate: [8000, 16000] + ruslan_8khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit' + sample_rate: 8000 + ruslan_16khz: + latest: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–' + example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.' + jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit' + sample_rate: 16000 + en: + lj_v2: + latest: + example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?' + package: 'https://models.silero.ai/models/tts/en/v2_lj.pt' + sample_rate: [8000, 16000] + lj_8khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–' + example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?' + jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–' + example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?' + jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit' + sample_rate: 8000 + lj_16khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–' + example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?' + jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–' + example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?' + jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit' + sample_rate: 16000 + de: + thorsten_v2: + latest: + example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.' + package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt' + sample_rate: [8000, 16000] + thorsten_8khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–' + example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.' + jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–' + example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.' + jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit' + sample_rate: 8000 + thorsten_16khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–' + example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.' + jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–' + example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.' + jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit' + sample_rate: 16000 + es: + tux_v2: + latest: + example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.' + package: 'https://models.silero.ai/models/tts/es/v2_tux.pt' + sample_rate: [8000, 16000] + tux_8khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿' + example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.' + jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿' + example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.' + jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit' + sample_rate: 8000 + tux_16khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿' + example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.' + jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿' + example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.' + jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit' + sample_rate: 16000 + fr: + gilles_v2: + latest: + example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.' + package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt' + sample_rate: [8000, 16000] + gilles_8khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–' + example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.' + jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit' + sample_rate: 8000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–' + example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.' + jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit' + sample_rate: 8000 + gilles_16khz: + latest: + tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–' + example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.' + jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit' + sample_rate: 16000 + v1: + tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–' + example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.' + jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit' + sample_rate: 16000 + ba: + aigul_v2: + latest: + example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.' + package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt' + sample_rate: [8000, 16000] + language_name: 'bashkir' + xal: + erdni_v2: + latest: + example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.' + package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt' + sample_rate: [8000, 16000] + language_name: 'kalmyk' + tt: + dilyara_v2: + latest: + example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.' + package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt' + sample_rate: [8000, 16000] + language_name: 'tatar' + uz: + dilnavoz_v2: + latest: + example: 'Tanishganimdan xursandman.' + package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt' + sample_rate: [8000, 16000] + language_name: 'uzbek' + ua: + mykyta_v2: + latest: + example: 'К+отики - пухн+асті жив+отики.' + package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt' + sample_rate: [8000, 24000, 48000] + language_name: 'ukrainian' + multi: + multi_v2: + latest: + package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt' + sample_rate: [8000, 16000] + speakers: + aidar: + lang: 'ru' + example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.' + baya: + lang: 'ru' + example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.' + kseniya: + lang: 'ru' + example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.' + irina: + lang: 'ru' + example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.' + ruslan: + lang: 'ru' + example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.' + natasha: + lang: 'ru' + example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.' + thorsten: + lang: 'de' + example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.' + tux: + lang: 'es' + example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.' + gilles: + lang: 'fr' + example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.' + lj: + lang: 'en' + example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?' + dilyara: + lang: 'tt' + example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.' +te_models: + latest: + package: "https://models.silero.ai/te_models/v2_4lang_q.pt" + languages: ['en', 'de', 'ru', 'es'] + punct: '.,-!?—' + v2: + package: "https://models.silero.ai/te_models/v2_4lang_q.pt" + languages: ['en', 'de', 'ru', 'es'] + punct: '.,-!?—' diff --git a/main.py b/main.py new file mode 100644 index 0000000..fb0ae65 --- /dev/null +++ b/main.py @@ -0,0 +1,81 @@ +# КЕША 2.0 + +import config +import stt +import tts +from fuzzywuzzy import fuzz +import datetime +from num2t4ru import num2text +import webbrowser +import random + + +print(f"{config.VA_NAME} (v{config.VA_VER}) начал свою работу ...") + + +def va_respond(voice: str): + print(voice) + if voice.startswith(config.VA_ALIAS): + # обращаются к ассистенту + cmd = recognize_cmd(filter_cmd(voice)) + + if cmd['cmd'] not in config.VA_CMD_LIST.keys(): + tts.va_speak("Что?") + else: + execute_cmd(cmd['cmd']) + + +def filter_cmd(raw_voice: str): + cmd = raw_voice + + for x in config.VA_ALIAS: + cmd = cmd.replace(x, "").strip() + + for x in config.VA_TBR: + cmd = cmd.replace(x, "").strip() + + return cmd + + +def recognize_cmd(cmd: str): + rc = {'cmd': '', 'percent': 0} + for c, v in config.VA_CMD_LIST.items(): + + for x in v: + vrt = fuzz.ratio(cmd, x) + if vrt > rc['percent']: + rc['cmd'] = c + rc['percent'] = vrt + + return rc + + +def execute_cmd(cmd: str): + if cmd == 'help': + # help + text = "Я умею: ..." + text += "произносить время ..." + text += "рассказывать анекдоты ..." + text += "и открывать браузер" + tts.va_speak(text) + pass + elif cmd == 'ctime': + # current time + now = datetime.datetime.now() + text = "Сейч+ас " + num2text(now.hour) + " " + num2text(now.minute) + tts.va_speak(text) + + elif cmd == 'joke': + jokes = ['Как смеются программисты? ... ехе ехе ехе', + 'ЭсКьюЭль запрос заходит в бар, подходит к двум столам и спрашивает .. «м+ожно присоединиться?»', + 'Программист это машина для преобразования кофе в код'] + + tts.va_speak(random.choice(jokes)) + + elif cmd == 'open_browser': + chrome_path = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s' + webbrowser.get(chrome_path).open("http://python.org") + + +# начать прослушивание команд +stt.va_listen(va_respond) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6bf8259 Binary files /dev/null and b/requirements.txt differ diff --git a/speech.wav b/speech.wav new file mode 100644 index 0000000..1b5f781 Binary files /dev/null and b/speech.wav differ diff --git a/stt.py b/stt.py new file mode 100644 index 0000000..90631a0 --- /dev/null +++ b/stt.py @@ -0,0 +1,30 @@ +import vosk +import sys +import sounddevice as sd +import queue +import json + +model = vosk.Model("model_small") +samplerate = 16000 +device = 1 + +q = queue.Queue() + + +def q_callback(indata, frames, time, status): + if status: + print(status, file=sys.stderr) + q.put(bytes(indata)) + + +def va_listen(callback): + with sd.RawInputStream(samplerate=samplerate, blocksize=8000, device=device, dtype='int16', + channels=1, callback=q_callback): + + rec = vosk.KaldiRecognizer(model, samplerate) + while True: + data = q.get() + if rec.AcceptWaveform(data): + callback(json.loads(rec.Result())["text"]) + #else: + # print(rec.PartialResult()) \ No newline at end of file diff --git a/tts.py b/tts.py new file mode 100644 index 0000000..a48b47e --- /dev/null +++ b/tts.py @@ -0,0 +1,35 @@ +import torch +import sounddevice as sd +import time + +language = 'ru' +model_id = 'ru_v3' +sample_rate = 48000 # 48000 +speaker = 'aidar' # aidar, baya, kseniya, xenia, random +put_accent = True +put_yo = True +device = torch.device('cpu') # cpu или gpu +text = "Хауди Хо, друзья!!!" + +model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models', + model='silero_tts', + language=language, + speaker=model_id) +model.to(device) + + +# воспроизводим +def va_speak(what: str): + audio = model.apply_tts(text=what+"..", + speaker=speaker, + sample_rate=sample_rate, + put_accent=put_accent, + put_yo=put_yo) + + sd.play(audio, sample_rate * 1.05) + time.sleep((len(audio) / sample_rate) + 0.5) + sd.stop() + +# sd.play(audio, sample_rate) +# time.sleep(len(audio) / sample_rate) +# sd.stop() \ No newline at end of file