Initial commit

This commit is contained in:
Abraham
2022-12-15 04:17:16 +05:00
commit 657230c904
9 changed files with 911 additions and 0 deletions

185
.gitignore vendored Normal file
View File

@@ -0,0 +1,185 @@
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# idea folder, uncomment if you don't need it
# .idea

59
_stt.py Normal file
View File

@@ -0,0 +1,59 @@
import torch
import sounddevice as sd
import speech_recognition as sr
import time
import numpy
from glob import glob
device = torch.device('cpu')
model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_stt',
language='en', # en, ru
device=device)
(read_batch, split_into_batches,
read_audio, prepare_model_input) = utils
def callback(_r, audio):
try:
# CONVERT raw wav data to NumPy array
# wav_raw = audio.get_wav_data()
# data_s16 = numpy.frombuffer(wav_raw, dtype=numpy.int16, count=len(wav_raw) // 2, offset=0)
# np_audio = data_s16 * 0.5 ** 15
# Play it via sounddevice
#sd.play(np_audio, m.SAMPLE_RATE)
#time.sleep(len(np_audio) / m.SAMPLE_RATE)
#sd.stop()
print("Распознание ...")
# TODO: fix crutch, pass audio data directly as a model input of Silero STT
with open('speech.wav', 'wb') as f:
f.write(audio.get_wav_data())
test_files = glob('speech.wav')
batches = split_into_batches(test_files, batch_size=10)
input = prepare_model_input(read_batch(batches[0]),
device=device)
output = model(input)
for example in output:
print(decoder(example.cpu()))
# voice = recognizer.recognize_google(audio, language="ru-RU").lower()
# print("[log] Распознано: " + voice)
except sr.UnknownValueError:
print("[log] Голос не распознан!")
# запуск
r = sr.Recognizer()
r.pause_threshold = 0.5
m = sr.Microphone(device_index=1)
with m as source:
r.adjust_for_ambient_noise(source)
stop_listening = r.listen_in_background(m, callback)
while True: time.sleep(0.1)

14
config.py Normal file
View File

@@ -0,0 +1,14 @@
VA_NAME = 'Кеша'
VA_VER = "2.0"
VA_ALIAS = ('кеша', 'кеш', 'инокентий', 'иннокентий', 'кишун', 'киш', 'кишаня', 'кешечка', 'кэш', 'кэша')
VA_TBR = ('скажи', 'покажи', 'ответь', 'произнеси', 'расскажи', 'сколько')
VA_CMD_LIST = {
"help": ('список команд', 'команды', 'что ты умеешь', 'твои навыки', 'навыки'),
"ctime": ('время', 'текущее время', 'сейчас времени', 'который час'),
"joke": ('расскажи анекдот', 'рассмеши', 'шутка', 'расскажи шутку', 'пошути', 'развесели'),
"open_browser": ('открой браузер', 'запусти браузер', 'открой гугл хром', 'гугл хром')
}

507
latest_silero_models.yml Normal file
View File

@@ -0,0 +1,507 @@
# pre-trained STT models
stt_models:
en:
latest:
meta:
name: "en_v6"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit: "https://models.silero.ai/models/en/en_v6.jit"
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
v6:
meta:
name: "en_v6"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit: "https://models.silero.ai/models/en/en_v6.jit"
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
v5:
meta:
name: "en_v5"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit: "https://models.silero.ai/models/en/en_v5.jit"
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
v4_0:
meta:
name: "en_v4_0"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
v3:
meta:
name: "en_v3"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit: "https://models.silero.ai/models/en/en_v3_jit.model"
onnx: "https://models.silero.ai/models/en/en_v3.onnx"
jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
v2:
meta:
name: "en_v2"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit: "https://models.silero.ai/models/en/en_v2_jit.model"
onnx: "https://models.silero.ai/models/en/en_v2.onnx"
tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
v1:
meta:
name: "en_v1"
sample: "https://models.silero.ai/examples/en_sample.wav"
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
jit: "https://models.silero.ai/models/en/en_v1_jit.model"
onnx: "https://models.silero.ai/models/en/en_v1.onnx"
tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
de:
latest:
meta:
name: "de_v1"
sample: "https://models.silero.ai/examples/de_sample.wav"
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
jit: "https://models.silero.ai/models/de/de_v1_jit.model"
onnx: "https://models.silero.ai/models/de/de_v1.onnx"
tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
v1:
meta:
name: "de_v1"
sample: "https://models.silero.ai/examples/de_sample.wav"
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
onnx: "https://models.silero.ai/models/de/de_v1.onnx"
tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
v3:
meta:
name: "de_v3"
sample: "https://models.silero.ai/examples/de_sample.wav"
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
v4:
meta:
name: "de_v4"
sample: "https://models.silero.ai/examples/de_sample.wav"
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
es:
latest:
meta:
name: "es_v1"
sample: "https://models.silero.ai/examples/es_sample.wav"
labels: "https://models.silero.ai/models/es/es_v1_labels.json"
jit: "https://models.silero.ai/models/es/es_v1_jit.model"
onnx: "https://models.silero.ai/models/es/es_v1.onnx"
tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
ua:
latest:
meta:
name: "ua_v3"
sample: "https://models.silero.ai/examples/ua_sample.wav"
credits:
datasets:
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
v3:
meta:
name: "ua_v3"
sample: "https://models.silero.ai/examples/ua_sample.wav"
credits:
datasets:
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
v1:
meta:
name: "ua_v1"
sample: "https://models.silero.ai/examples/ua_sample.wav"
credits:
datasets:
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
tts_models:
ru:
ru_v3:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
sample_rate: [8000, 24000, 48000]
aidar_v2:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
sample_rate: [8000, 16000]
aidar_8khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
sample_rate: 8000
aidar_16khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
sample_rate: 16000
baya_v2:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
sample_rate: [8000, 16000]
baya_8khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
sample_rate: 8000
baya_16khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
sample_rate: 16000
irina_v2:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
sample_rate: [8000, 16000]
irina_8khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
sample_rate: 8000
irina_16khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
sample_rate: 16000
kseniya_v2:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
sample_rate: [8000, 16000]
kseniya_8khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
sample_rate: 8000
kseniya_16khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
sample_rate: 16000
natasha_v2:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
sample_rate: [8000, 16000]
natasha_8khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
sample_rate: 8000
natasha_16khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
sample_rate: 16000
ruslan_v2:
latest:
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
sample_rate: [8000, 16000]
ruslan_8khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
sample_rate: 8000
ruslan_16khz:
latest:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;'
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
sample_rate: 16000
en:
lj_v2:
latest:
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
sample_rate: [8000, 16000]
lj_8khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;'
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;'
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
sample_rate: 8000
lj_16khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;'
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;'
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
sample_rate: 16000
de:
thorsten_v2:
latest:
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
sample_rate: [8000, 16000]
thorsten_8khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;'
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;'
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
sample_rate: 8000
thorsten_16khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;'
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;'
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
sample_rate: 16000
es:
tux_v2:
latest:
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
sample_rate: [8000, 16000]
tux_8khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
sample_rate: 8000
tux_16khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
sample_rate: 16000
fr:
gilles_v2:
latest:
example: 'Je suis ce que je suis, et si je suis ce que je suis, quest ce que je suis.'
package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
sample_rate: [8000, 16000]
gilles_8khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;'
example: 'Je suis ce que je suis, et si je suis ce que je suis, quest ce que je suis.'
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
sample_rate: 8000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;'
example: 'Je suis ce que je suis, et si je suis ce que je suis, quest ce que je suis.'
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
sample_rate: 8000
gilles_16khz:
latest:
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;'
example: 'Je suis ce que je suis, et si je suis ce que je suis, quest ce que je suis.'
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
sample_rate: 16000
v1:
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;'
example: 'Je suis ce que je suis, et si je suis ce que je suis, quest ce que je suis.'
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
sample_rate: 16000
ba:
aigul_v2:
latest:
example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
sample_rate: [8000, 16000]
language_name: 'bashkir'
xal:
erdni_v2:
latest:
example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
sample_rate: [8000, 16000]
language_name: 'kalmyk'
tt:
dilyara_v2:
latest:
example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
sample_rate: [8000, 16000]
language_name: 'tatar'
uz:
dilnavoz_v2:
latest:
example: 'Tanishganimdan xursandman.'
package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
sample_rate: [8000, 16000]
language_name: 'uzbek'
ua:
mykyta_v2:
latest:
example: 'К+отики - пухн+асті жив+отики.'
package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
sample_rate: [8000, 24000, 48000]
language_name: 'ukrainian'
multi:
multi_v2:
latest:
package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
sample_rate: [8000, 16000]
speakers:
aidar:
lang: 'ru'
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
baya:
lang: 'ru'
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
kseniya:
lang: 'ru'
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
irina:
lang: 'ru'
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
ruslan:
lang: 'ru'
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
natasha:
lang: 'ru'
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
thorsten:
lang: 'de'
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
tux:
lang: 'es'
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
gilles:
lang: 'fr'
example: 'Je suis ce que je suis, et si je suis ce que je suis, quest ce que je suis.'
lj:
lang: 'en'
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
dilyara:
lang: 'tt'
example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
te_models:
latest:
package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
languages: ['en', 'de', 'ru', 'es']
punct: '.,-!?—'
v2:
package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
languages: ['en', 'de', 'ru', 'es']
punct: '.,-!?—'

81
main.py Normal file
View File

@@ -0,0 +1,81 @@
# КЕША 2.0
import config
import stt
import tts
from fuzzywuzzy import fuzz
import datetime
from num2t4ru import num2text
import webbrowser
import random
print(f"{config.VA_NAME} (v{config.VA_VER}) начал свою работу ...")
def va_respond(voice: str):
print(voice)
if voice.startswith(config.VA_ALIAS):
# обращаются к ассистенту
cmd = recognize_cmd(filter_cmd(voice))
if cmd['cmd'] not in config.VA_CMD_LIST.keys():
tts.va_speak("Что?")
else:
execute_cmd(cmd['cmd'])
def filter_cmd(raw_voice: str):
cmd = raw_voice
for x in config.VA_ALIAS:
cmd = cmd.replace(x, "").strip()
for x in config.VA_TBR:
cmd = cmd.replace(x, "").strip()
return cmd
def recognize_cmd(cmd: str):
rc = {'cmd': '', 'percent': 0}
for c, v in config.VA_CMD_LIST.items():
for x in v:
vrt = fuzz.ratio(cmd, x)
if vrt > rc['percent']:
rc['cmd'] = c
rc['percent'] = vrt
return rc
def execute_cmd(cmd: str):
if cmd == 'help':
# help
text = "Я умею: ..."
text += "произносить время ..."
text += "рассказывать анекдоты ..."
text += "и открывать браузер"
tts.va_speak(text)
pass
elif cmd == 'ctime':
# current time
now = datetime.datetime.now()
text = "Сейч+ас " + num2text(now.hour) + " " + num2text(now.minute)
tts.va_speak(text)
elif cmd == 'joke':
jokes = ['Как смеются программисты? ... ехе ехе ехе',
'ЭсКьюЭль запрос заходит в бар, подходит к двум столам и спрашивает .. «м+ожно присоединиться?»',
'Программист это машина для преобразования кофе в код']
tts.va_speak(random.choice(jokes))
elif cmd == 'open_browser':
chrome_path = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s'
webbrowser.get(chrome_path).open("http://python.org")
# начать прослушивание команд
stt.va_listen(va_respond)

BIN
requirements.txt Normal file

Binary file not shown.

BIN
speech.wav Normal file

Binary file not shown.

30
stt.py Normal file
View File

@@ -0,0 +1,30 @@
import vosk
import sys
import sounddevice as sd
import queue
import json
model = vosk.Model("model_small")
samplerate = 16000
device = 1
q = queue.Queue()
def q_callback(indata, frames, time, status):
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
def va_listen(callback):
with sd.RawInputStream(samplerate=samplerate, blocksize=8000, device=device, dtype='int16',
channels=1, callback=q_callback):
rec = vosk.KaldiRecognizer(model, samplerate)
while True:
data = q.get()
if rec.AcceptWaveform(data):
callback(json.loads(rec.Result())["text"])
#else:
# print(rec.PartialResult())

35
tts.py Normal file
View File

@@ -0,0 +1,35 @@
import torch
import sounddevice as sd
import time
language = 'ru'
model_id = 'ru_v3'
sample_rate = 48000 # 48000
speaker = 'aidar' # aidar, baya, kseniya, xenia, random
put_accent = True
put_yo = True
device = torch.device('cpu') # cpu или gpu
text = "Хауди Хо, друзья!!!"
model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_tts',
language=language,
speaker=model_id)
model.to(device)
# воспроизводим
def va_speak(what: str):
audio = model.apply_tts(text=what+"..",
speaker=speaker,
sample_rate=sample_rate,
put_accent=put_accent,
put_yo=put_yo)
sd.play(audio, sample_rate * 1.05)
time.sleep((len(audio) / sample_rate) + 0.5)
sd.stop()
# sd.play(audio, sample_rate)
# time.sleep(len(audio) / sample_rate)
# sd.stop()