diff --git a/mycroft/tts/__init__.py b/mycroft/tts/__init__.py index 0f5020ea0f..aad0507f08 100644 --- a/mycroft/tts/__init__.py +++ b/mycroft/tts/__init__.py @@ -275,6 +275,20 @@ class TTS: # return text with supported ssml tags only return utterance.replace(" ", " ") + def _preprocess_sentence(self, sentence): + """ Default preprocessing is no preprocessing. + + This method can be overridden to create chunks suitable to the + TTS engine in question. + + Arguments: + sentence (str): sentence to preprocess + + Returns: + list: list of sentence parts + """ + return [sentence] + def execute(self, sentence, ident=None): """ Convert sentence to speech, preprocessing out unsupported ssml @@ -295,20 +309,23 @@ class TTS: sentence = sentence.replace(word, self.spellings[word.lower()]) - key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest()) - wav_file = os.path.join(mycroft.util.get_cache_directory("tts"), - key + '.' + self.audio_ext) + chunks = self._preprocess_sentence(sentence) + for sentence in chunks: + key = str(hashlib.md5( + sentence.encode('utf-8', 'ignore')).hexdigest()) + wav_file = os.path.join(mycroft.util.get_cache_directory("tts"), + key + '.' + self.audio_ext) - if os.path.exists(wav_file): - LOG.debug("TTS cache hit") - phonemes = self.load_phonemes(key) - else: - wav_file, phonemes = self.get_tts(sentence, wav_file) - if phonemes: - self.save_phonemes(key, phonemes) + if os.path.exists(wav_file): + LOG.debug("TTS cache hit") + phonemes = self.load_phonemes(key) + else: + wav_file, phonemes = self.get_tts(sentence, wav_file) + if phonemes: + self.save_phonemes(key, phonemes) - vis = self.viseme(phonemes) - self.queue.put((self.audio_ext, wav_file, vis, ident)) + vis = self.viseme(phonemes) + self.queue.put((self.audio_ext, wav_file, vis, ident)) def viseme(self, phonemes): """ diff --git a/mycroft/tts/mimic2_tts.py b/mycroft/tts/mimic2_tts.py index b1b454c20c..97cbe83500 100644 --- a/mycroft/tts/mimic2_tts.py +++ b/mycroft/tts/mimic2_tts.py @@ -89,13 +89,17 @@ def _split_by_punctuation(chunks, puncs): e.g. hello, world => [hello, world] Args: - chunks (list): text (str) to split + chunks (list or str): text (str) to split puncs (list): list of punctuations used to split text Returns: list: list with split text """ - out = chunks + if isinstance(chunks, str): + out = [chunks] + else: + out = chunks + for punc in puncs: splits = [] for t in out: @@ -138,7 +142,7 @@ def _sentence_chunker(text): # first split by punctuations that are major pauses first_splits = _split_by_punctuation( - [text], + text, puncs=[r'\.', r'\!', r'\?', r'\:', r'\;'] ) @@ -149,7 +153,7 @@ def _sentence_chunker(text): second_splits += _split_by_punctuation(chunk, puncs=[r'\,', '--', '-']) else: - second_splits += chunk + second_splits.append(chunk) # if chunks are still too big, chop into pieces of at most 20 words third_splits = [] @@ -157,7 +161,7 @@ def _sentence_chunker(text): if len(chunk) > _max_sentence_size: third_splits += _split_by_chunk_size(chunk, 20) else: - third_splits += chunk + third_splits.append(chunk) return [_add_punctuation(chunk) for chunk in third_splits] @@ -194,7 +198,7 @@ class Mimic2(TTS): '%s Http Error: %s for url: %s' % (req.status_code, req.reason, req.url)) - def _requests(self, chunks): + def _requests(self, sentence): """create asynchronous request list Args: @@ -203,13 +207,9 @@ class Mimic2(TTS): Returns: list: list of FutureSession objects """ - reqs = [] - for chunk in chunks: - if len(chunk) > 0: - url = self.url + parse.quote(chunk) - req_route = url + "&visimes=True" - reqs.append(self.session.get(req_route, timeout=5)) - return reqs + url = self.url + parse.quote(sentence) + req_route = url + "&visimes=True" + return self.session.get(req_route, timeout=5) def viseme(self, phonemes): """ Maps phonemes to appropriate viseme encoding @@ -234,6 +234,10 @@ class Mimic2(TTS): visemes.append((vis, vis_dur)) return visemes + def _prepocess_sentence(sentence): + """ Split sentence in chunks better suited for mimic2. """ + return _sentence_chunker(sentence) + def get_tts(self, sentence, wav_file): """ Generate (remotely) and play mimic2 WAV audio @@ -241,23 +245,14 @@ class Mimic2(TTS): sentence (str): Phrase to synthesize to audio with mimic2 wav_file (str): Location to write audio output """ - - # Use the phonetic_spelling mechanism from the TTS base class - if self.phonetic_spelling: - for word in re.findall(r"[\w']+", sentence): - if word.lower() in self.spellings: - sentence = sentence.replace(word, - self.spellings[word.lower()]) - - chunks = _sentence_chunker(sentence) - LOG.debug("Generating Mimic2 TSS for: "+str(chunks)) + LOG.debug("Generating Mimic2 TSS for: " + str(sentence)) try: - for _, req in enumerate(self._requests(chunks)): - results = req.result().json() - audio = base64.b64decode(results['audio_base64']) - vis = results['visimes'] - with open(wav_file, 'wb') as f: - f.write(audio) + req = self._requests(sentence) + results = req.result().json() + audio = base64.b64decode(results['audio_base64']) + vis = results['visimes'] + with open(wav_file, 'wb') as f: + f.write(audio) except (ReadTimeout, ConnectionError, ConnectTimeout, HTTPError): raise RemoteTTSTimeoutException( "Mimic 2 server request timed out. Falling back to mimic")