phoneme punctuation bug fix

2019-02-16 03:20:04 +01:00 · 2019-02-16 03:20:04 +01:00 · 97a16cedbf
parent 4d2c374cfd
commit 97a16cedbf
2 changed files with 4 additions and 4 deletions
--- a/utils/text/init.py
+++ b/utils/text/init.py
@ -4,7 +4,7 @@ import re
 import phonemizer
 from phonemizer.phonemize import phonemize
 from utils.text import cleaners
-from utils.text.symbols import symbols, phonemes, _punctuations
+from utils.text.symbols import symbols, phonemes, _phoneme_punctuations

 # Mappings from symbol to numeric ID and vice versa:
 _symbol_to_id = {s: i for i, s in enumerate(symbols)}
@ -17,7 +17,7 @@ _id_to_phonemes = {i: s for i, s in enumerate(phonemes)}
 _curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')

 # Regular expression matchinf punctuations, ignoring empty space
-pat = r'['+_punctuations[:-1]+']+'
+pat = r'['+_phoneme_punctuations[:-1]+']+'


 def text2phone(text, language):
@ -31,7 +31,7 @@ def text2phone(text, language):
    # Replace \n with matching punctuations.
    if len(punctuations) > 0:
        for punct in punctuations[:-1]:
-             ph = ph.replace(' \n', punct+'| ', 1)
+             ph = ph.replace('| |\n', '|'+punct+'| |', 1)
        try:
             ph = ph[:-1] + punctuations[-1]
        except:
@ -63,7 +63,6 @@ def sequence_to_phoneme(sequence):
    for symbol_id in sequence:
        if symbol_id in _id_to_phonemes:
            s = _id_to_phonemes[symbol_id]
-            print(s)
            result += s
    return result.replace('}{', ' ')

--- a/utils/text/symbols.py
+++ b/utils/text/symbols.py
@ -11,6 +11,7 @@ _pad = '_'
 _eos = '~'
 _characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? '
 _punctuations = '!\'(),-.:;? '
+_phoneme_punctuations = '.!;:,?'

 # TODO: include more phoneme characters for other languages.
 _phonemes = ['l','ɹ','ɜ','ɚ','k','u','ʔ','ð','ɐ','ɾ','ɑ','ɔ','b','ɛ','t','v','n','m','ʊ','ŋ','s',