mirror of https://github.com/coqui-ai/TTS.git
update for phonemizer 2.1
parent
78464f1ead
commit
4130674e46
|
@ -69,7 +69,7 @@ def test_phoneme_to_sequence():
|
||||||
|
|
||||||
def test_text2phone():
|
def test_text2phone():
|
||||||
text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
|
text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
|
||||||
gt = "ɹ|iː|s|ə|n|t| |ɹ|ɪ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i|| |ɪ|n|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ɪ|s|p|ɑː|n|s|ə|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|uː|l|eɪ|ʃ|ə|n||| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!"
|
gt = "ɹ|iː|s|ə|n|t| |ɹ|ɪ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|n|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ɪ|s|p|ɑː|n|s|ə|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|uː|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!"
|
||||||
lang = "en-us"
|
lang = "en-us"
|
||||||
phonemes = text2phone(text, lang)
|
phonemes = text2phone(text, lang)
|
||||||
assert gt == phonemes
|
assert gt == phonemes, f"\n{phonemes} \n vs \n{gt}"
|
||||||
|
|
|
@ -28,8 +28,10 @@ def text2phone(text, language):
|
||||||
seperator = phonemizer.separator.Separator(' |', '', '|')
|
seperator = phonemizer.separator.Separator(' |', '', '|')
|
||||||
#try:
|
#try:
|
||||||
punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
|
punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
|
||||||
|
if float(phonemizer.__version__) < 2.1:
|
||||||
ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language)
|
ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language)
|
||||||
ph = ph[:-1].strip() # skip the last empty character
|
ph = ph[:-1].strip() # skip the last empty character
|
||||||
|
# phonemizer does not tackle punctuations. Here we do.
|
||||||
# Replace \n with matching punctuations.
|
# Replace \n with matching punctuations.
|
||||||
if punctuations:
|
if punctuations:
|
||||||
# if text ends with a punctuation.
|
# if text ends with a punctuation.
|
||||||
|
@ -43,6 +45,17 @@ def text2phone(text, language):
|
||||||
else:
|
else:
|
||||||
for punct in punctuations:
|
for punct in punctuations:
|
||||||
ph = ph.replace('| |\n', '|'+punct+'| |', 1)
|
ph = ph.replace('| |\n', '|'+punct+'| |', 1)
|
||||||
|
elif float(phonemizer.__version__) == 2.1:
|
||||||
|
ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language, preserve_punctuation=True)
|
||||||
|
# this is a simple fix for phonemizer.
|
||||||
|
# https://github.com/bootphon/phonemizer/issues/32
|
||||||
|
if punctuations:
|
||||||
|
for punctuation in punctuations:
|
||||||
|
ph = ph.replace(f"| |{punctuation} ", f"|{punctuation}| |").replace(f"| |{punctuation}", f"|{punctuation}| |")
|
||||||
|
ph = ph[:-3]
|
||||||
|
else:
|
||||||
|
raise RuntimeError(" [!] Use 'phonemizer' version 2.1 or older.")
|
||||||
|
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue