Change wake word upload to https request
Also strips out old alternative ways to enable wake word upload The logic for uploading wake words is now: - Only if opt-in is enabled - With an additional "disable" setting to selectively prevent itpull/1629/head
parent
321807b1f1
commit
727e1c787c
|
@ -13,26 +13,23 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import audioop
|
import audioop
|
||||||
import collections
|
|
||||||
import datetime
|
|
||||||
from hashlib import md5
|
|
||||||
import shutil
|
|
||||||
from tempfile import gettempdir
|
|
||||||
from threading import Thread, Lock
|
|
||||||
from time import sleep, time as get_time
|
from time import sleep, time as get_time
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import pyaudio
|
import pyaudio
|
||||||
|
import requests
|
||||||
import speech_recognition
|
import speech_recognition
|
||||||
from os import mkdir
|
from hashlib import md5
|
||||||
from os.path import isdir, join, expanduser, isfile
|
from io import BytesIO, StringIO
|
||||||
from speech_recognition import (
|
from speech_recognition import (
|
||||||
Microphone,
|
Microphone,
|
||||||
AudioSource,
|
AudioSource,
|
||||||
AudioData
|
AudioData
|
||||||
)
|
)
|
||||||
import requests
|
from threading import Thread, Lock
|
||||||
from subprocess import check_output, Popen, PIPE
|
|
||||||
|
|
||||||
from mycroft.api import DeviceApi
|
from mycroft.api import DeviceApi
|
||||||
from mycroft.configuration import Configuration
|
from mycroft.configuration import Configuration
|
||||||
|
@ -178,7 +175,8 @@ class ResponsiveRecognizer(speech_recognition.Recognizer):
|
||||||
|
|
||||||
self.config = Configuration.get()
|
self.config = Configuration.get()
|
||||||
listener_config = self.config.get('listener')
|
listener_config = self.config.get('listener')
|
||||||
self.upload_config = listener_config.get('wake_word_upload')
|
self.upload_url = listener_config['wake_word_upload']['url']
|
||||||
|
self.upload_disabled = listener_config['wake_word_upload']['disable']
|
||||||
self.wake_word_name = wake_word_recognizer.key_phrase
|
self.wake_word_name = wake_word_recognizer.key_phrase
|
||||||
|
|
||||||
self.overflow_exc = listener_config.get('overflow_exception', False)
|
self.overflow_exc = listener_config.get('overflow_exception', False)
|
||||||
|
@ -191,10 +189,7 @@ class ResponsiveRecognizer(speech_recognition.Recognizer):
|
||||||
# check the config for the flag to save wake words.
|
# check the config for the flag to save wake words.
|
||||||
|
|
||||||
self.save_utterances = listener_config.get('record_utterances', False)
|
self.save_utterances = listener_config.get('record_utterances', False)
|
||||||
self.save_wake_words = listener_config.get('record_wake_words') \
|
|
||||||
or self.upload_config['enable'] or self.config['opt_in']
|
|
||||||
self.upload_lock = Lock()
|
self.upload_lock = Lock()
|
||||||
self.save_wake_words_dir = join(gettempdir(), 'mycroft_wake_words')
|
|
||||||
self.filenames_to_upload = []
|
self.filenames_to_upload = []
|
||||||
self.mic_level_file = os.path.join(get_ipc_directory(), "mic_level")
|
self.mic_level_file = os.path.join(get_ipc_directory(), "mic_level")
|
||||||
self._stop_signaled = False
|
self._stop_signaled = False
|
||||||
|
@ -204,7 +199,7 @@ class ResponsiveRecognizer(speech_recognition.Recognizer):
|
||||||
num_phonemes = wake_word_recognizer.num_phonemes
|
num_phonemes = wake_word_recognizer.num_phonemes
|
||||||
len_phoneme = listener_config.get('phoneme_duration', 120) / 1000.0
|
len_phoneme = listener_config.get('phoneme_duration', 120) / 1000.0
|
||||||
self.TEST_WW_SEC = num_phonemes * len_phoneme
|
self.TEST_WW_SEC = num_phonemes * len_phoneme
|
||||||
self.SAVED_WW_SEC = 3 if self.save_wake_words else self.TEST_WW_SEC
|
self.SAVED_WW_SEC = max(3, self.TEST_WW_SEC)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.account_id = DeviceApi().get()['user']['uuid']
|
self.account_id = DeviceApi().get()['user']['uuid']
|
||||||
|
@ -337,37 +332,30 @@ class ResponsiveRecognizer(speech_recognition.Recognizer):
|
||||||
"""
|
"""
|
||||||
self._stop_signaled = True
|
self._stop_signaled = True
|
||||||
|
|
||||||
def _upload_file(self, filename):
|
def _upload_wake_word(self, audio):
|
||||||
server = self.upload_config['server']
|
ww_module = self.wake_word_recognizer.__class__.__name__
|
||||||
keyfile = resolve_resource_file('wakeword_rsa')
|
if ww_module == 'PreciseHotword':
|
||||||
userfile = expanduser('~/.mycroft/wakeword_rsa')
|
_, model_path = self.wake_word_recognizer.get_model_info()
|
||||||
|
with open(model_path, 'rb') as f:
|
||||||
|
model_hash = md5(f.read()).hexdigest()
|
||||||
|
else:
|
||||||
|
model_hash = '0'
|
||||||
|
|
||||||
if not isfile(userfile):
|
metadata = {
|
||||||
shutil.copy2(keyfile, userfile)
|
'name': self.wake_word_name.replace(' ', '-'),
|
||||||
os.chmod(userfile, 0o600)
|
'engine': md5(ww_module.encode('utf-8')).hexdigest(),
|
||||||
keyfile = userfile
|
'time': str(int(1000 * get_time())),
|
||||||
|
'sessionId': SessionManager.get().session_id,
|
||||||
address = '{}@{}:{}'.format(
|
'accountId': self.account_id,
|
||||||
self.upload_config['user'], server, self.upload_config['folder']
|
'model': str(model_hash)
|
||||||
|
}
|
||||||
|
requests.post(
|
||||||
|
self.upload_url, files={
|
||||||
|
'audio': BytesIO(audio.get_wav_data()),
|
||||||
|
'metadata': StringIO(json.dumps(metadata))
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.upload_lock:
|
|
||||||
self.filenames_to_upload.append(filename)
|
|
||||||
for i, fn in enumerate(self.filenames_to_upload):
|
|
||||||
LOG.debug('Uploading wake word...')
|
|
||||||
os.chmod(fn, 0o666)
|
|
||||||
scp_status = Popen([
|
|
||||||
'scp', '-o', 'StrictHostKeyChecking=no', '-P',
|
|
||||||
str(self.upload_config['port']), '-i', keyfile,
|
|
||||||
fn, address
|
|
||||||
], stdout=PIPE, stderr=PIPE).wait()
|
|
||||||
if scp_status == 0:
|
|
||||||
del self.filenames_to_upload[i]
|
|
||||||
os.remove(fn)
|
|
||||||
else:
|
|
||||||
LOG.debug('Failed to upload wake word to metrics server')
|
|
||||||
break
|
|
||||||
|
|
||||||
def _wait_until_wake_word(self, source, sec_per_buffer):
|
def _wait_until_wake_word(self, source, sec_per_buffer):
|
||||||
"""Listen continuously on source until a wake word is spoken
|
"""Listen continuously on source until a wake word is spoken
|
||||||
|
|
||||||
|
@ -399,13 +387,6 @@ class ResponsiveRecognizer(speech_recognition.Recognizer):
|
||||||
idx_energy = 0
|
idx_energy = 0
|
||||||
avg_energy = 0.0
|
avg_energy = 0.0
|
||||||
energy_avg_samples = int(5 / sec_per_buffer) # avg over last 5 secs
|
energy_avg_samples = int(5 / sec_per_buffer) # avg over last 5 secs
|
||||||
|
|
||||||
ww_module = self.wake_word_recognizer.__class__.__name__
|
|
||||||
if ww_module == 'PreciseHotword':
|
|
||||||
_, model_path = self.wake_word_recognizer.get_model_info()
|
|
||||||
model_hash = check_output(['md5sum', model_path]).split()[0]
|
|
||||||
else:
|
|
||||||
model_hash = '0'
|
|
||||||
counter = 0
|
counter = 0
|
||||||
|
|
||||||
while not said_wake_word and not self._stop_signaled:
|
while not said_wake_word and not self._stop_signaled:
|
||||||
|
@ -460,31 +441,13 @@ class ResponsiveRecognizer(speech_recognition.Recognizer):
|
||||||
audio_data = chopped + silence
|
audio_data = chopped + silence
|
||||||
said_wake_word = \
|
said_wake_word = \
|
||||||
self.wake_word_recognizer.found_wake_word(audio_data)
|
self.wake_word_recognizer.found_wake_word(audio_data)
|
||||||
# if a wake word is success full then record audio in temp
|
# if a wake word is success full then upload wake word
|
||||||
# file.
|
if said_wake_word and self.config['opt_in'] and not \
|
||||||
if self.save_wake_words and said_wake_word:
|
self.upload_disabled:
|
||||||
audio = self._create_audio_data(byte_data, source)
|
Thread(
|
||||||
|
target=self._upload_wake_word, daemon=True,
|
||||||
if not isdir(self.save_wake_words_dir):
|
args=[self._create_audio_data(byte_data, source)]
|
||||||
mkdir(self.save_wake_words_dir)
|
).start()
|
||||||
dr = self.save_wake_words_dir
|
|
||||||
|
|
||||||
components = [
|
|
||||||
self.wake_word_name.replace(' ', '-'),
|
|
||||||
md5(ww_module.encode('utf-8')).hexdigest(),
|
|
||||||
str(int(1000 * get_time())),
|
|
||||||
SessionManager.get().session_id,
|
|
||||||
self.account_id,
|
|
||||||
str(model_hash)
|
|
||||||
]
|
|
||||||
fn = join(dr, '.'.join(components) + '.wav')
|
|
||||||
with open(fn, 'wb') as f:
|
|
||||||
f.write(audio.get_wav_data())
|
|
||||||
|
|
||||||
if self.upload_config['enable'] or self.config['opt_in']:
|
|
||||||
t = Thread(target=self._upload_file, args=(fn,))
|
|
||||||
t.daemon = True
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _create_audio_data(raw_data, source):
|
def _create_audio_data(raw_data, source):
|
||||||
|
|
|
@ -138,11 +138,8 @@
|
||||||
"record_wake_words": false,
|
"record_wake_words": false,
|
||||||
"record_utterances": false,
|
"record_utterances": false,
|
||||||
"wake_word_upload": {
|
"wake_word_upload": {
|
||||||
"enable": false,
|
"disable": false,
|
||||||
"server": "mycroft.wickedbroadband.com",
|
"url": "https://training.mycroft.ai/precise/upload"
|
||||||
"port": 1776,
|
|
||||||
"user": "precise",
|
|
||||||
"folder": "/home/precise/wakewords"
|
|
||||||
},
|
},
|
||||||
// In milliseconds
|
// In milliseconds
|
||||||
"phoneme_duration": 120,
|
"phoneme_duration": 120,
|
||||||
|
|
Loading…
Reference in New Issue