mycroft-core/mycroft/tts/cache_handler.py

# Copyright 2019 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Cache handler - reads all the .dialog files (The default
mycroft responses) and does a tts inference.
It then saves the .wav files to mark1 device

"""

import base64
import glob
import os
import re
import shutil
import hashlib
import json
import mycroft.util as util
from urllib import parse
from requests_futures.sessions import FuturesSession
from mycroft.util.log import LOG


REGEX_SPL_CHARS = re.compile(r'[@#$%^*()<>/\|}{~:]')
MIMIC2_URL = 'https://mimic-api.mycroft.ai/synthesize?text='

# For now we only get the cache for mimic2-kusal
TTS = 'Mimic2'

# Check for more default dialogs
res_path = os.path.abspath(os.path.join(os.path.abspath(__file__), '..',
                                        '..', 'res', 'text', 'en-us'))
wifi_setup_path = '/usr/local/mycroft/mycroft-wifi-setup/dialog/en-us'
cache_dialog_path = [res_path, wifi_setup_path]


def generate_cache_text(cache_audio_dir, cache_text_file):
    """
    This prepares a text file with all the sentences
    from *.dialog files present in
    mycroft/res/text/en-us and mycroft-wifi setup skill
    Args:
        cache_audio_dir (path): path to store .wav files
        cache_text_file (file): file containing the sentences
    """
    try:
        if not os.path.isfile(cache_text_file):
            os.makedirs(cache_audio_dir)
            f = open(cache_text_file, 'w')
            for each_path in cache_dialog_path:
                if os.path.exists(each_path):
                    write_cache_text(each_path, f)
            f.close()
            LOG.debug("Completed generating cache")
        else:
            LOG.debug("Cache file 'cache_text.txt' already exists")
    except Exception:
        LOG.error("Could not open text file to write cache")


def write_cache_text(cache_path, f):
    for file in glob.glob(cache_path + "/*.dialog"):
        try:
            with open(file, 'r') as fp:
                all_dialogs = fp.readlines()
                for each_dialog in all_dialogs:
                    # split the sentences
                    each_dialog = re.split(
                        r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
                        each_dialog.strip())
                    for each in each_dialog:
                        if (REGEX_SPL_CHARS.search(each) is None):
                            # Do not consider sentences with special
                            # characters other than any punctuation
                            # ex : <<< LOADING <<<
                            # should not be considered
                            f.write(each.strip() + '\n')
        except Exception:
            # LOG.debug("Dialog Skipped")
            pass


def download_audio(cache_audio_dir, cache_text_file):
    """
    This method takes the sentences from the text file generated
    using generate_cache_text() and performs TTS inference on
    mimic2-api. The wav files and phonemes are stored in
    'cache_audio_dir'
    Args:
        cache_audio_dir (path): path to store .wav files
        cache_text_file (file): file containing the sentences
    """
    if os.path.isfile(cache_text_file) and \
            os.path.exists(cache_audio_dir):
        if not os.listdir(cache_audio_dir):
            session = FuturesSession()
            with open(cache_text_file, 'r') as fp:
                all_dialogs = fp.readlines()
                for each_dialog in all_dialogs:
                    each_dialog = each_dialog.strip()
                    key = str(hashlib.md5(
                        each_dialog.encode('utf-8', 'ignore')).hexdigest())
                    wav_file = os.path.join(cache_audio_dir, key + '.wav')
                    each_dialog = parse.quote(each_dialog)

                    mimic2_url = MIMIC2_URL + each_dialog + '&visimes=True'
                    try:
                        req = session.get(mimic2_url)
                        results = req.result().json()
                        audio = base64.b64decode(results['audio_base64'])
                        vis = results['visimes']
                        if audio:
                            with open(wav_file, 'wb') as audiofile:
                                audiofile.write(audio)
                        if vis:
                            pho_file = os.path.join(cache_audio_dir,
                                                    key + ".pho")
                            with open(pho_file, "w") as cachefile:
                                cachefile.write(json.dumps(vis))  # Mimic2
                                # cachefile.write(str(vis))  # Mimic
                    except Exception as e:
                        # Skip this dialog and continue
                        LOG.error("Unable to get pre-loaded cache "
                                  "due to ({})".format(repr(e)))

            LOG.debug("Completed getting cache for {}".format(TTS))

        else:
            LOG.debug("Pre-loaded cache for {} already exists".
                      format(TTS))
    else:
        missing_path = cache_text_file if not \
            os.path.isfile(cache_text_file)\
            else cache_audio_dir
        LOG.error("Path ({}) does not exist for getting the cache"
                  .format(missing_path))


def copy_cache(cache_audio_dir):
    """
    This method copies the cache from 'cache_audio_dir'
    to TTS specific cache directory given by
    get_cache_directory()
    Args:
        cache_audio_dir (path): path containing .wav files
    """
    if os.path.exists(cache_audio_dir):
        # get tmp directory where tts cache is stored
        dest = util.get_cache_directory('tts/' + 'Mimic2')
        files = os.listdir(cache_audio_dir)
        for f in files:
            shutil.copy2(os.path.join(cache_audio_dir, f), dest)
        LOG.debug("Copied all pre-loaded cache for {} to {}"
                  .format(TTS, dest))
    else:
        LOG.debug("No Source directory for {} pre-loaded cache"
                  .format(TTS))


# Start here
def main(cache_audio_dir):
    # Path where cache is stored and not cleared on reboot/TTS change
    if cache_audio_dir:
        cache_text_file = os.path.join(cache_audio_dir,
                                       '..', 'cache_text.txt')
        generate_cache_text(cache_audio_dir, cache_text_file)
        download_audio(cache_audio_dir, cache_text_file)
        copy_cache(cache_audio_dir)