Fix mimic 2 long sentences (#2061)

* Fix mimic 2 long sentences

Fixes bug in  the second and third chunking pass incorrectly by
concatinating strings with lists resulting in chunks of single
characters.

* Handle mimic2 chunking correctly

- Move preprocessing from get_tts() to a method called from tts execute,
    this allows all parts to be spoken and the caching to work correctly
- Remove duplicate of phonetic spelling in mimic2_tts
pull/2063/head
Åke 2019-03-22 16:20:06 +01:00 committed by Ruthvicp
parent dfa714c56d
commit 9eeb8cefc3
2 changed files with 53 additions and 41 deletions

View File

@ -275,6 +275,20 @@ class TTS:
# return text with supported ssml tags only
return utterance.replace(" ", " ")
def _preprocess_sentence(self, sentence):
""" Default preprocessing is no preprocessing.
This method can be overridden to create chunks suitable to the
TTS engine in question.
Arguments:
sentence (str): sentence to preprocess
Returns:
list: list of sentence parts
"""
return [sentence]
def execute(self, sentence, ident=None):
"""
Convert sentence to speech, preprocessing out unsupported ssml
@ -295,20 +309,23 @@ class TTS:
sentence = sentence.replace(word,
self.spellings[word.lower()])
key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
key + '.' + self.audio_ext)
chunks = self._preprocess_sentence(sentence)
for sentence in chunks:
key = str(hashlib.md5(
sentence.encode('utf-8', 'ignore')).hexdigest())
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
key + '.' + self.audio_ext)
if os.path.exists(wav_file):
LOG.debug("TTS cache hit")
phonemes = self.load_phonemes(key)
else:
wav_file, phonemes = self.get_tts(sentence, wav_file)
if phonemes:
self.save_phonemes(key, phonemes)
if os.path.exists(wav_file):
LOG.debug("TTS cache hit")
phonemes = self.load_phonemes(key)
else:
wav_file, phonemes = self.get_tts(sentence, wav_file)
if phonemes:
self.save_phonemes(key, phonemes)
vis = self.viseme(phonemes)
self.queue.put((self.audio_ext, wav_file, vis, ident))
vis = self.viseme(phonemes)
self.queue.put((self.audio_ext, wav_file, vis, ident))
def viseme(self, phonemes):
"""

View File

@ -89,13 +89,17 @@ def _split_by_punctuation(chunks, puncs):
e.g. hello, world => [hello, world]
Args:
chunks (list): text (str) to split
chunks (list or str): text (str) to split
puncs (list): list of punctuations used to split text
Returns:
list: list with split text
"""
out = chunks
if isinstance(chunks, str):
out = [chunks]
else:
out = chunks
for punc in puncs:
splits = []
for t in out:
@ -138,7 +142,7 @@ def _sentence_chunker(text):
# first split by punctuations that are major pauses
first_splits = _split_by_punctuation(
[text],
text,
puncs=[r'\.', r'\!', r'\?', r'\:', r'\;']
)
@ -149,7 +153,7 @@ def _sentence_chunker(text):
second_splits += _split_by_punctuation(chunk,
puncs=[r'\,', '--', '-'])
else:
second_splits += chunk
second_splits.append(chunk)
# if chunks are still too big, chop into pieces of at most 20 words
third_splits = []
@ -157,7 +161,7 @@ def _sentence_chunker(text):
if len(chunk) > _max_sentence_size:
third_splits += _split_by_chunk_size(chunk, 20)
else:
third_splits += chunk
third_splits.append(chunk)
return [_add_punctuation(chunk) for chunk in third_splits]
@ -194,7 +198,7 @@ class Mimic2(TTS):
'%s Http Error: %s for url: %s' %
(req.status_code, req.reason, req.url))
def _requests(self, chunks):
def _requests(self, sentence):
"""create asynchronous request list
Args:
@ -203,13 +207,9 @@ class Mimic2(TTS):
Returns:
list: list of FutureSession objects
"""
reqs = []
for chunk in chunks:
if len(chunk) > 0:
url = self.url + parse.quote(chunk)
req_route = url + "&visimes=True"
reqs.append(self.session.get(req_route, timeout=5))
return reqs
url = self.url + parse.quote(sentence)
req_route = url + "&visimes=True"
return self.session.get(req_route, timeout=5)
def viseme(self, phonemes):
""" Maps phonemes to appropriate viseme encoding
@ -234,6 +234,10 @@ class Mimic2(TTS):
visemes.append((vis, vis_dur))
return visemes
def _prepocess_sentence(sentence):
""" Split sentence in chunks better suited for mimic2. """
return _sentence_chunker(sentence)
def get_tts(self, sentence, wav_file):
""" Generate (remotely) and play mimic2 WAV audio
@ -241,23 +245,14 @@ class Mimic2(TTS):
sentence (str): Phrase to synthesize to audio with mimic2
wav_file (str): Location to write audio output
"""
# Use the phonetic_spelling mechanism from the TTS base class
if self.phonetic_spelling:
for word in re.findall(r"[\w']+", sentence):
if word.lower() in self.spellings:
sentence = sentence.replace(word,
self.spellings[word.lower()])
chunks = _sentence_chunker(sentence)
LOG.debug("Generating Mimic2 TSS for: "+str(chunks))
LOG.debug("Generating Mimic2 TSS for: " + str(sentence))
try:
for _, req in enumerate(self._requests(chunks)):
results = req.result().json()
audio = base64.b64decode(results['audio_base64'])
vis = results['visimes']
with open(wav_file, 'wb') as f:
f.write(audio)
req = self._requests(sentence)
results = req.result().json()
audio = base64.b64decode(results['audio_base64'])
vis = results['visimes']
with open(wav_file, 'wb') as f:
f.write(audio)
except (ReadTimeout, ConnectionError, ConnectTimeout, HTTPError):
raise RemoteTTSTimeoutException(
"Mimic 2 server request timed out. Falling back to mimic")