180 lines
6.8 KiB
Python
180 lines
6.8 KiB
Python
# Copyright 2019 Mycroft AI Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Cache handler - reads all the .dialog files (The default
|
|
mycroft responses) and does a tts inference.
|
|
It then saves the .wav files to mark1 device
|
|
|
|
"""
|
|
|
|
import base64
|
|
import glob
|
|
import os
|
|
import re
|
|
import shutil
|
|
import hashlib
|
|
import json
|
|
import mycroft.util as util
|
|
from urllib import parse
|
|
from requests_futures.sessions import FuturesSession
|
|
from mycroft.util.log import LOG
|
|
|
|
|
|
REGEX_SPL_CHARS = re.compile(r'[@#$%^*()<>/\|}{~:]')
|
|
MIMIC2_URL = 'https://mimic-api.mycroft.ai/synthesize?text='
|
|
|
|
# For now we only get the cache for mimic2-kusal
|
|
TTS = 'Mimic2'
|
|
|
|
# Check for more default dialogs
|
|
res_path = os.path.abspath(os.path.join(os.path.abspath(__file__), '..',
|
|
'..', 'res', 'text', 'en-us'))
|
|
wifi_setup_path = '/usr/local/mycroft/mycroft-wifi-setup/dialog/en-us'
|
|
cache_dialog_path = [res_path, wifi_setup_path]
|
|
|
|
|
|
def generate_cache_text(cache_audio_dir, cache_text_file):
|
|
"""
|
|
This prepares a text file with all the sentences
|
|
from *.dialog files present in
|
|
mycroft/res/text/en-us and mycroft-wifi setup skill
|
|
Args:
|
|
cache_audio_dir (path): path to store .wav files
|
|
cache_text_file (file): file containing the sentences
|
|
"""
|
|
try:
|
|
if not os.path.isfile(cache_text_file):
|
|
os.makedirs(cache_audio_dir)
|
|
f = open(cache_text_file, 'w')
|
|
for each_path in cache_dialog_path:
|
|
if os.path.exists(each_path):
|
|
write_cache_text(each_path, f)
|
|
f.close()
|
|
LOG.debug("Completed generating cache")
|
|
else:
|
|
LOG.debug("Cache file 'cache_text.txt' already exists")
|
|
except Exception:
|
|
LOG.error("Could not open text file to write cache")
|
|
|
|
|
|
def write_cache_text(cache_path, f):
|
|
for file in glob.glob(cache_path + "/*.dialog"):
|
|
try:
|
|
with open(file, 'r') as fp:
|
|
all_dialogs = fp.readlines()
|
|
for each_dialog in all_dialogs:
|
|
# split the sentences
|
|
each_dialog = re.split(
|
|
r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
|
|
each_dialog.strip())
|
|
for each in each_dialog:
|
|
if (REGEX_SPL_CHARS.search(each) is None):
|
|
# Do not consider sentences with special
|
|
# characters other than any punctuation
|
|
# ex : <<< LOADING <<<
|
|
# should not be considered
|
|
f.write(each.strip() + '\n')
|
|
except Exception:
|
|
# LOG.debug("Dialog Skipped")
|
|
pass
|
|
|
|
|
|
def download_audio(cache_audio_dir, cache_text_file):
|
|
"""
|
|
This method takes the sentences from the text file generated
|
|
using generate_cache_text() and performs TTS inference on
|
|
mimic2-api. The wav files and phonemes are stored in
|
|
'cache_audio_dir'
|
|
Args:
|
|
cache_audio_dir (path): path to store .wav files
|
|
cache_text_file (file): file containing the sentences
|
|
"""
|
|
if os.path.isfile(cache_text_file) and \
|
|
os.path.exists(cache_audio_dir):
|
|
if not os.listdir(cache_audio_dir):
|
|
session = FuturesSession()
|
|
with open(cache_text_file, 'r') as fp:
|
|
all_dialogs = fp.readlines()
|
|
for each_dialog in all_dialogs:
|
|
each_dialog = each_dialog.strip()
|
|
key = str(hashlib.md5(
|
|
each_dialog.encode('utf-8', 'ignore')).hexdigest())
|
|
wav_file = os.path.join(cache_audio_dir, key + '.wav')
|
|
each_dialog = parse.quote(each_dialog)
|
|
|
|
mimic2_url = MIMIC2_URL + each_dialog + '&visimes=True'
|
|
try:
|
|
req = session.get(mimic2_url)
|
|
results = req.result().json()
|
|
audio = base64.b64decode(results['audio_base64'])
|
|
vis = results['visimes']
|
|
if audio:
|
|
with open(wav_file, 'wb') as audiofile:
|
|
audiofile.write(audio)
|
|
if vis:
|
|
pho_file = os.path.join(cache_audio_dir,
|
|
key + ".pho")
|
|
with open(pho_file, "w") as cachefile:
|
|
cachefile.write(json.dumps(vis)) # Mimic2
|
|
# cachefile.write(str(vis)) # Mimic
|
|
except Exception as e:
|
|
# Skip this dialog and continue
|
|
LOG.error("Unable to get pre-loaded cache "
|
|
"due to ({})".format(repr(e)))
|
|
|
|
LOG.debug("Completed getting cache for {}".format(TTS))
|
|
|
|
else:
|
|
LOG.debug("Pre-loaded cache for {} already exists".
|
|
format(TTS))
|
|
else:
|
|
missing_path = cache_text_file if not \
|
|
os.path.isfile(cache_text_file)\
|
|
else cache_audio_dir
|
|
LOG.error("Path ({}) does not exist for getting the cache"
|
|
.format(missing_path))
|
|
|
|
|
|
def copy_cache(cache_audio_dir):
|
|
"""
|
|
This method copies the cache from 'cache_audio_dir'
|
|
to TTS specific cache directory given by
|
|
get_cache_directory()
|
|
Args:
|
|
cache_audio_dir (path): path containing .wav files
|
|
"""
|
|
if os.path.exists(cache_audio_dir):
|
|
# get tmp directory where tts cache is stored
|
|
dest = util.get_cache_directory('tts/' + 'Mimic2')
|
|
files = os.listdir(cache_audio_dir)
|
|
for f in files:
|
|
shutil.copy2(os.path.join(cache_audio_dir, f), dest)
|
|
LOG.debug("Copied all pre-loaded cache for {} to {}"
|
|
.format(TTS, dest))
|
|
else:
|
|
LOG.debug("No Source directory for {} pre-loaded cache"
|
|
.format(TTS))
|
|
|
|
|
|
# Start here
|
|
def main(cache_audio_dir):
|
|
# Path where cache is stored and not cleared on reboot/TTS change
|
|
if cache_audio_dir:
|
|
cache_text_file = os.path.join(cache_audio_dir,
|
|
'..', 'cache_text.txt')
|
|
generate_cache_text(cache_audio_dir, cache_text_file)
|
|
download_audio(cache_audio_dir, cache_text_file)
|
|
copy_cache(cache_audio_dir)
|