Fix mimic 2 long sentences (#2061)
* Fix mimic 2 long sentences Fixes bug in the second and third chunking pass incorrectly by concatinating strings with lists resulting in chunks of single characters. * Handle mimic2 chunking correctly - Move preprocessing from get_tts() to a method called from tts execute, this allows all parts to be spoken and the caching to work correctly - Remove duplicate of phonetic spelling in mimic2_ttspull/2063/head
parent
dfa714c56d
commit
9eeb8cefc3
|
@ -275,6 +275,20 @@ class TTS:
|
||||||
# return text with supported ssml tags only
|
# return text with supported ssml tags only
|
||||||
return utterance.replace(" ", " ")
|
return utterance.replace(" ", " ")
|
||||||
|
|
||||||
|
def _preprocess_sentence(self, sentence):
|
||||||
|
""" Default preprocessing is no preprocessing.
|
||||||
|
|
||||||
|
This method can be overridden to create chunks suitable to the
|
||||||
|
TTS engine in question.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
sentence (str): sentence to preprocess
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: list of sentence parts
|
||||||
|
"""
|
||||||
|
return [sentence]
|
||||||
|
|
||||||
def execute(self, sentence, ident=None):
|
def execute(self, sentence, ident=None):
|
||||||
"""
|
"""
|
||||||
Convert sentence to speech, preprocessing out unsupported ssml
|
Convert sentence to speech, preprocessing out unsupported ssml
|
||||||
|
@ -295,20 +309,23 @@ class TTS:
|
||||||
sentence = sentence.replace(word,
|
sentence = sentence.replace(word,
|
||||||
self.spellings[word.lower()])
|
self.spellings[word.lower()])
|
||||||
|
|
||||||
key = str(hashlib.md5(sentence.encode('utf-8', 'ignore')).hexdigest())
|
chunks = self._preprocess_sentence(sentence)
|
||||||
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
|
for sentence in chunks:
|
||||||
key + '.' + self.audio_ext)
|
key = str(hashlib.md5(
|
||||||
|
sentence.encode('utf-8', 'ignore')).hexdigest())
|
||||||
|
wav_file = os.path.join(mycroft.util.get_cache_directory("tts"),
|
||||||
|
key + '.' + self.audio_ext)
|
||||||
|
|
||||||
if os.path.exists(wav_file):
|
if os.path.exists(wav_file):
|
||||||
LOG.debug("TTS cache hit")
|
LOG.debug("TTS cache hit")
|
||||||
phonemes = self.load_phonemes(key)
|
phonemes = self.load_phonemes(key)
|
||||||
else:
|
else:
|
||||||
wav_file, phonemes = self.get_tts(sentence, wav_file)
|
wav_file, phonemes = self.get_tts(sentence, wav_file)
|
||||||
if phonemes:
|
if phonemes:
|
||||||
self.save_phonemes(key, phonemes)
|
self.save_phonemes(key, phonemes)
|
||||||
|
|
||||||
vis = self.viseme(phonemes)
|
vis = self.viseme(phonemes)
|
||||||
self.queue.put((self.audio_ext, wav_file, vis, ident))
|
self.queue.put((self.audio_ext, wav_file, vis, ident))
|
||||||
|
|
||||||
def viseme(self, phonemes):
|
def viseme(self, phonemes):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -89,13 +89,17 @@ def _split_by_punctuation(chunks, puncs):
|
||||||
e.g. hello, world => [hello, world]
|
e.g. hello, world => [hello, world]
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
chunks (list): text (str) to split
|
chunks (list or str): text (str) to split
|
||||||
puncs (list): list of punctuations used to split text
|
puncs (list): list of punctuations used to split text
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list: list with split text
|
list: list with split text
|
||||||
"""
|
"""
|
||||||
out = chunks
|
if isinstance(chunks, str):
|
||||||
|
out = [chunks]
|
||||||
|
else:
|
||||||
|
out = chunks
|
||||||
|
|
||||||
for punc in puncs:
|
for punc in puncs:
|
||||||
splits = []
|
splits = []
|
||||||
for t in out:
|
for t in out:
|
||||||
|
@ -138,7 +142,7 @@ def _sentence_chunker(text):
|
||||||
|
|
||||||
# first split by punctuations that are major pauses
|
# first split by punctuations that are major pauses
|
||||||
first_splits = _split_by_punctuation(
|
first_splits = _split_by_punctuation(
|
||||||
[text],
|
text,
|
||||||
puncs=[r'\.', r'\!', r'\?', r'\:', r'\;']
|
puncs=[r'\.', r'\!', r'\?', r'\:', r'\;']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -149,7 +153,7 @@ def _sentence_chunker(text):
|
||||||
second_splits += _split_by_punctuation(chunk,
|
second_splits += _split_by_punctuation(chunk,
|
||||||
puncs=[r'\,', '--', '-'])
|
puncs=[r'\,', '--', '-'])
|
||||||
else:
|
else:
|
||||||
second_splits += chunk
|
second_splits.append(chunk)
|
||||||
|
|
||||||
# if chunks are still too big, chop into pieces of at most 20 words
|
# if chunks are still too big, chop into pieces of at most 20 words
|
||||||
third_splits = []
|
third_splits = []
|
||||||
|
@ -157,7 +161,7 @@ def _sentence_chunker(text):
|
||||||
if len(chunk) > _max_sentence_size:
|
if len(chunk) > _max_sentence_size:
|
||||||
third_splits += _split_by_chunk_size(chunk, 20)
|
third_splits += _split_by_chunk_size(chunk, 20)
|
||||||
else:
|
else:
|
||||||
third_splits += chunk
|
third_splits.append(chunk)
|
||||||
|
|
||||||
return [_add_punctuation(chunk) for chunk in third_splits]
|
return [_add_punctuation(chunk) for chunk in third_splits]
|
||||||
|
|
||||||
|
@ -194,7 +198,7 @@ class Mimic2(TTS):
|
||||||
'%s Http Error: %s for url: %s' %
|
'%s Http Error: %s for url: %s' %
|
||||||
(req.status_code, req.reason, req.url))
|
(req.status_code, req.reason, req.url))
|
||||||
|
|
||||||
def _requests(self, chunks):
|
def _requests(self, sentence):
|
||||||
"""create asynchronous request list
|
"""create asynchronous request list
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -203,13 +207,9 @@ class Mimic2(TTS):
|
||||||
Returns:
|
Returns:
|
||||||
list: list of FutureSession objects
|
list: list of FutureSession objects
|
||||||
"""
|
"""
|
||||||
reqs = []
|
url = self.url + parse.quote(sentence)
|
||||||
for chunk in chunks:
|
req_route = url + "&visimes=True"
|
||||||
if len(chunk) > 0:
|
return self.session.get(req_route, timeout=5)
|
||||||
url = self.url + parse.quote(chunk)
|
|
||||||
req_route = url + "&visimes=True"
|
|
||||||
reqs.append(self.session.get(req_route, timeout=5))
|
|
||||||
return reqs
|
|
||||||
|
|
||||||
def viseme(self, phonemes):
|
def viseme(self, phonemes):
|
||||||
""" Maps phonemes to appropriate viseme encoding
|
""" Maps phonemes to appropriate viseme encoding
|
||||||
|
@ -234,6 +234,10 @@ class Mimic2(TTS):
|
||||||
visemes.append((vis, vis_dur))
|
visemes.append((vis, vis_dur))
|
||||||
return visemes
|
return visemes
|
||||||
|
|
||||||
|
def _prepocess_sentence(sentence):
|
||||||
|
""" Split sentence in chunks better suited for mimic2. """
|
||||||
|
return _sentence_chunker(sentence)
|
||||||
|
|
||||||
def get_tts(self, sentence, wav_file):
|
def get_tts(self, sentence, wav_file):
|
||||||
""" Generate (remotely) and play mimic2 WAV audio
|
""" Generate (remotely) and play mimic2 WAV audio
|
||||||
|
|
||||||
|
@ -241,23 +245,14 @@ class Mimic2(TTS):
|
||||||
sentence (str): Phrase to synthesize to audio with mimic2
|
sentence (str): Phrase to synthesize to audio with mimic2
|
||||||
wav_file (str): Location to write audio output
|
wav_file (str): Location to write audio output
|
||||||
"""
|
"""
|
||||||
|
LOG.debug("Generating Mimic2 TSS for: " + str(sentence))
|
||||||
# Use the phonetic_spelling mechanism from the TTS base class
|
|
||||||
if self.phonetic_spelling:
|
|
||||||
for word in re.findall(r"[\w']+", sentence):
|
|
||||||
if word.lower() in self.spellings:
|
|
||||||
sentence = sentence.replace(word,
|
|
||||||
self.spellings[word.lower()])
|
|
||||||
|
|
||||||
chunks = _sentence_chunker(sentence)
|
|
||||||
LOG.debug("Generating Mimic2 TSS for: "+str(chunks))
|
|
||||||
try:
|
try:
|
||||||
for _, req in enumerate(self._requests(chunks)):
|
req = self._requests(sentence)
|
||||||
results = req.result().json()
|
results = req.result().json()
|
||||||
audio = base64.b64decode(results['audio_base64'])
|
audio = base64.b64decode(results['audio_base64'])
|
||||||
vis = results['visimes']
|
vis = results['visimes']
|
||||||
with open(wav_file, 'wb') as f:
|
with open(wav_file, 'wb') as f:
|
||||||
f.write(audio)
|
f.write(audio)
|
||||||
except (ReadTimeout, ConnectionError, ConnectTimeout, HTTPError):
|
except (ReadTimeout, ConnectionError, ConnectTimeout, HTTPError):
|
||||||
raise RemoteTTSTimeoutException(
|
raise RemoteTTSTimeoutException(
|
||||||
"Mimic 2 server request timed out. Falling back to mimic")
|
"Mimic 2 server request timed out. Falling back to mimic")
|
||||||
|
|
Loading…
Reference in New Issue