Fix mimic 2 long sentences (#2061)
* Fix mimic 2 long sentences Fixes bug in the second and third chunking pass incorrectly by concatinating strings with lists resulting in chunks of single characters. * Handle mimic2 chunking correctly - Move preprocessing from get_tts() to a method called from tts execute, this allows all parts to be spoken and the caching to work correctly - Remove duplicate of phonetic spelling in mimic2_ttspull/2063/head
parent
dfa714c56d
commit
9eeb8cefc3
|
@ -275,6 +275,20 @@ class TTS:
|
|||
# return text with supported ssml tags only
|
||||
return utterance.replace(" ", " ")
|
||||
|
||||
def _preprocess_sentence(self, sentence):
|
||||
""" Default preprocessing is no preprocessing.
|
||||
|
||||
This method can be overridden to create chunks suitable to the
|
||||
TTS engine in question.
|
||||
|
||||
Arguments:
|
||||
sentence (str): sentence to preprocess
|
||||
|
||||
Returns:
|
||||
list: list of sentence parts
|
||||
"""
|
||||
return [sentence]
|
||||
|
||||
def execute(self, sentence, ident=None):
|
||||
"""
|
||||
Convert sentence to speech, preprocessing out unsupported ssml
|
||||
|
@ -295,20 +309,23 @@ class TTS:
|
|||
sentence = sentence.replace(word,
|
||||
self.spellings[word.lower()])
|
||||
|
||||
key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
|
||||
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
|
||||
key + '.' + self.audio_ext)
|
||||
chunks = self._preprocess_sentence(sentence)
|
||||
for sentence in chunks:
|
||||
key = str(hashlib.md5(
|
||||
sentence.encode('utf-8', 'ignore')).hexdigest())
|
||||
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
|
||||
key + '.' + self.audio_ext)
|
||||
|
||||
if os.path.exists(wav_file):
|
||||
LOG.debug("TTS cache hit")
|
||||
phonemes = self.load_phonemes(key)
|
||||
else:
|
||||
wav_file, phonemes = self.get_tts(sentence, wav_file)
|
||||
if phonemes:
|
||||
self.save_phonemes(key, phonemes)
|
||||
if os.path.exists(wav_file):
|
||||
LOG.debug("TTS cache hit")
|
||||
phonemes = self.load_phonemes(key)
|
||||
else:
|
||||
wav_file, phonemes = self.get_tts(sentence, wav_file)
|
||||
if phonemes:
|
||||
self.save_phonemes(key, phonemes)
|
||||
|
||||
vis = self.viseme(phonemes)
|
||||
self.queue.put((self.audio_ext, wav_file, vis, ident))
|
||||
vis = self.viseme(phonemes)
|
||||
self.queue.put((self.audio_ext, wav_file, vis, ident))
|
||||
|
||||
def viseme(self, phonemes):
|
||||
"""
|
||||
|
|
|
@ -89,13 +89,17 @@ def _split_by_punctuation(chunks, puncs):
|
|||
e.g. hello, world => [hello, world]
|
||||
|
||||
Args:
|
||||
chunks (list): text (str) to split
|
||||
chunks (list or str): text (str) to split
|
||||
puncs (list): list of punctuations used to split text
|
||||
|
||||
Returns:
|
||||
list: list with split text
|
||||
"""
|
||||
out = chunks
|
||||
if isinstance(chunks, str):
|
||||
out = [chunks]
|
||||
else:
|
||||
out = chunks
|
||||
|
||||
for punc in puncs:
|
||||
splits = []
|
||||
for t in out:
|
||||
|
@ -138,7 +142,7 @@ def _sentence_chunker(text):
|
|||
|
||||
# first split by punctuations that are major pauses
|
||||
first_splits = _split_by_punctuation(
|
||||
[text],
|
||||
text,
|
||||
puncs=[r'\.', r'\!', r'\?', r'\:', r'\;']
|
||||
)
|
||||
|
||||
|
@ -149,7 +153,7 @@ def _sentence_chunker(text):
|
|||
second_splits += _split_by_punctuation(chunk,
|
||||
puncs=[r'\,', '--', '-'])
|
||||
else:
|
||||
second_splits += chunk
|
||||
second_splits.append(chunk)
|
||||
|
||||
# if chunks are still too big, chop into pieces of at most 20 words
|
||||
third_splits = []
|
||||
|
@ -157,7 +161,7 @@ def _sentence_chunker(text):
|
|||
if len(chunk) > _max_sentence_size:
|
||||
third_splits += _split_by_chunk_size(chunk, 20)
|
||||
else:
|
||||
third_splits += chunk
|
||||
third_splits.append(chunk)
|
||||
|
||||
return [_add_punctuation(chunk) for chunk in third_splits]
|
||||
|
||||
|
@ -194,7 +198,7 @@ class Mimic2(TTS):
|
|||
'%s Http Error: %s for url: %s' %
|
||||
(req.status_code, req.reason, req.url))
|
||||
|
||||
def _requests(self, chunks):
|
||||
def _requests(self, sentence):
|
||||
"""create asynchronous request list
|
||||
|
||||
Args:
|
||||
|
@ -203,13 +207,9 @@ class Mimic2(TTS):
|
|||
Returns:
|
||||
list: list of FutureSession objects
|
||||
"""
|
||||
reqs = []
|
||||
for chunk in chunks:
|
||||
if len(chunk) > 0:
|
||||
url = self.url + parse.quote(chunk)
|
||||
req_route = url + "&visimes=True"
|
||||
reqs.append(self.session.get(req_route, timeout=5))
|
||||
return reqs
|
||||
url = self.url + parse.quote(sentence)
|
||||
req_route = url + "&visimes=True"
|
||||
return self.session.get(req_route, timeout=5)
|
||||
|
||||
def viseme(self, phonemes):
|
||||
""" Maps phonemes to appropriate viseme encoding
|
||||
|
@ -234,6 +234,10 @@ class Mimic2(TTS):
|
|||
visemes.append((vis, vis_dur))
|
||||
return visemes
|
||||
|
||||
def _prepocess_sentence(sentence):
|
||||
""" Split sentence in chunks better suited for mimic2. """
|
||||
return _sentence_chunker(sentence)
|
||||
|
||||
def get_tts(self, sentence, wav_file):
|
||||
""" Generate (remotely) and play mimic2 WAV audio
|
||||
|
||||
|
@ -241,23 +245,14 @@ class Mimic2(TTS):
|
|||
sentence (str): Phrase to synthesize to audio with mimic2
|
||||
wav_file (str): Location to write audio output
|
||||
"""
|
||||
|
||||
# Use the phonetic_spelling mechanism from the TTS base class
|
||||
if self.phonetic_spelling:
|
||||
for word in re.findall(r"[\w']+", sentence):
|
||||
if word.lower() in self.spellings:
|
||||
sentence = sentence.replace(word,
|
||||
self.spellings[word.lower()])
|
||||
|
||||
chunks = _sentence_chunker(sentence)
|
||||
LOG.debug("Generating Mimic2 TSS for: "+str(chunks))
|
||||
LOG.debug("Generating Mimic2 TSS for: " + str(sentence))
|
||||
try:
|
||||
for _, req in enumerate(self._requests(chunks)):
|
||||
results = req.result().json()
|
||||
audio = base64.b64decode(results['audio_base64'])
|
||||
vis = results['visimes']
|
||||
with open(wav_file, 'wb') as f:
|
||||
f.write(audio)
|
||||
req = self._requests(sentence)
|
||||
results = req.result().json()
|
||||
audio = base64.b64decode(results['audio_base64'])
|
||||
vis = results['visimes']
|
||||
with open(wav_file, 'wb') as f:
|
||||
f.write(audio)
|
||||
except (ReadTimeout, ConnectionError, ConnectTimeout, HTTPError):
|
||||
raise RemoteTTSTimeoutException(
|
||||
"Mimic 2 server request timed out. Falling back to mimic")
|
||||
|
|
Loading…
Reference in New Issue