From 99d9bb7a174fce17f13c9f988e75d76703023066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 19 Nov 2021 10:29:24 +0100 Subject: [PATCH] Test Phonemizers --- tests/text_tests/test_characters.py | 49 +++++----- tests/text_tests/test_phonemizer.py | 144 ++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 25 deletions(-) create mode 100644 tests/text_tests/test_phonemizer.py diff --git a/tests/text_tests/test_characters.py b/tests/text_tests/test_characters.py index 5a051ac4..ed84b5b4 100644 --- a/tests/text_tests/test_characters.py +++ b/tests/text_tests/test_characters.py @@ -1,12 +1,6 @@ import unittest -from TTS.tts.utils.text.characters import ( - BaseCharacters, - IPAPhonemes, - Graphemes, - create_graphemes, - create_phonemes, -) +from TTS.tts.utils.text.characters import BaseCharacters, Graphemes, IPAPhonemes, create_graphemes, create_phonemes def test_make_symbols(): @@ -16,16 +10,7 @@ def test_make_symbols(): class BaseCharacterTest(unittest.TestCase): def setUp(self): - self.characters_empty = BaseCharacters( - "", - "", - pad="", - eos="", - bos="", - blank="", - is_unique=True, - is_sorted=True - ) + self.characters_empty = BaseCharacters("", "", pad="", eos="", bos="", blank="", is_unique=True, is_sorted=True) def test_default_character_sets(self): """Test initiation of default character sets""" @@ -41,8 +26,10 @@ class BaseCharacterTest(unittest.TestCase): self.characters_empty.bos = "[BOS]" self.characters_empty.blank = "[BLANK]" - self.assertEqual(self.characters_empty.num_chars, len(["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "])) - + self.assertEqual( + self.characters_empty.num_chars, + len(["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "]), + ) def test_unique_sorted(self): """Test if the unique and sorted option works""" @@ -53,7 +40,10 @@ class BaseCharacterTest(unittest.TestCase): self.characters_empty.bos = "[BOS]" self.characters_empty.blank = "[BLANK]" - self.assertEqual(self.characters_empty.num_chars, len(["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "])) + self.assertEqual( + self.characters_empty.num_chars, + len(["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "]), + ) def test_setters_getters(self): """Test the class setters behaves as expected""" @@ -71,16 +61,26 @@ class BaseCharacterTest(unittest.TestCase): self.characters_empty.eos = "[EOS]" self.assertEqual(self.characters_empty._eos, "[EOS]") - self.assertEqual(self.characters_empty.vocab, ["[PAD]", "[EOS]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "]) + self.assertEqual( + self.characters_empty.vocab, ["[PAD]", "[EOS]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "] + ) self.characters_empty.bos = "[BOS]" self.assertEqual(self.characters_empty._bos, "[BOS]") - self.assertEqual(self.characters_empty.vocab, ["[PAD]", "[EOS]", "[BOS]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "]) + self.assertEqual( + self.characters_empty.vocab, ["[PAD]", "[EOS]", "[BOS]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "] + ) self.characters_empty.blank = "[BLANK]" self.assertEqual(self.characters_empty._blank, "[BLANK]") - self.assertEqual(self.characters_empty.vocab, ["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "]) - self.assertEqual(self.characters_empty.num_chars, len(["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "])) + self.assertEqual( + self.characters_empty.vocab, + ["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "], + ) + self.assertEqual( + self.characters_empty.num_chars, + len(["[PAD]", "[EOS]", "[BOS]", "[BLANK]", "a", "b", "c", ".", ",", ";", ":", "!", "?", " "]), + ) self.characters_empty.print_log() @@ -124,4 +124,3 @@ class BaseCharacterTest(unittest.TestCase): self.assertEqual(self.characters_empty.id_to_char(11), "!") self.assertEqual(self.characters_empty.id_to_char(12), "?") self.assertEqual(self.characters_empty.id_to_char(13), " ") - diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py new file mode 100644 index 00000000..cd0adfe1 --- /dev/null +++ b/tests/text_tests/test_phonemizer.py @@ -0,0 +1,144 @@ +import unittest + +from TTS.tts.utils.text.characters import BaseCharacters, Graphemes, IPAPhonemes, create_graphemes, create_phonemes +from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer +from TTS.tts.utils.text.tokenizer import TTSTokenizer + +EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" + + +class TestEspeakPhonemizer(unittest.TestCase): + def setUp(self): + self.phonemizer = ESpeak(language="en-us") + self.EXPECTED_PHONEMES = "ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ|ɹ ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!" + + def test_phonemize(self): + output = self.phonemizer.phonemize(EXAMPLE_TEXT, separator="|") + self.assertEqual(output, self.EXPECTED_PHONEMES) + + # multiple punctuations + text = "Be a voice, not an! echo?" + gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?" + output = self.phonemizer.phonemize(text, separator="|") + output = output.replace("|", "") + self.assertEqual(output, gt) + + # not ending with punctuation + text = "Be a voice, not an! echo" + gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ" + output = self.phonemizer.phonemize(text, separator="") + self.assertEqual(output, gt) + + # extra space after the sentence + text = "Be a voice, not an! echo. " + gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ." + output = self.phonemizer.phonemize(text, separator="") + self.assertEqual(output, gt) + + def test_name(self): + self.assertEqual(self.phonemizer.name(), "espeak") + + def test_get_supported_languages(self): + self.assertIsInstance(self.phonemizer.supported_languages(), dict) + + def test_get_version(self): + self.assertIsInstance(self.phonemizer.version(), str) + + def test_is_available(self): + self.assertTrue(self.phonemizer.is_available()) + + +class TestGruutPhonemizer(unittest.TestCase): + def setUp(self): + self.phonemizer = Gruut(language="en-us", use_espeak_phonemes=True, keep_stress=False) + self.EXPECTED_PHONEMES = "ɹ|i|ː|s|ə|n|t| ɹ|ᵻ|s|ɜ|ː|t|ʃ| æ|ɾ| h|ɑ|ː|ɹ|v|ɚ|d| h|ɐ|z| ʃ|o|ʊ|n| m|ɛ|d|ᵻ|t|e|ɪ|ɾ|ɪ|ŋ| f|ɔ|ː|ɹ| æ|z| l|ɪ|ɾ|ə|l| æ|z| e|ɪ|t| w|i|ː|k|s| k|æ|ŋ| æ|k|t|ʃ|u|ː|ə|l|i| ɪ|ŋ|k|ɹ|i|ː|s, ð|ə| ɡ|ɹ|e|ɪ| m|æ|ɾ|ɚ| ɪ|n| ð|ə| p|ɑ|ː|ɹ|t|s| ʌ|v| ð|ə| b|ɹ|e|ɪ|n| ɹ|ᵻ|s|p|ɑ|ː|n|s|ᵻ|b|ə|l| f|ɔ|ː|ɹ| ɪ|m|o|ʊ|ʃ|ə|n|ə|l| ɹ|ɛ|ɡ|j|ʊ|l|e|ɪ|ʃ|ə|n| æ|n|d| l|ɜ|ː|n|ɪ|ŋ!" + + def test_phonemize(self): + output = self.phonemizer.phonemize(EXAMPLE_TEXT, separator="|") + self.assertEqual(output, self.EXPECTED_PHONEMES) + + # multiple punctuations + text = "Be a voice, not an! echo?" + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?" + output = self.phonemizer.phonemize(text, separator="|") + output = output.replace("|", "") + self.assertEqual(output, gt) + + # not ending with punctuation + text = "Be a voice, not an! echo" + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" + output = self.phonemizer.phonemize(text, separator="") + self.assertEqual(output, gt) + + # extra space after the sentence + text = "Be a voice, not an! echo. " + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ." + output = self.phonemizer.phonemize(text, separator="") + self.assertEqual(output, gt) + + def test_name(self): + self.assertEqual(self.phonemizer.name(), "gruut") + + def test_get_supported_languages(self): + self.assertIsInstance(self.phonemizer.supported_languages(), list) + + def test_get_version(self): + self.assertIsInstance(self.phonemizer.version(), str) + + def test_is_available(self): + self.assertTrue(self.phonemizer.is_available()) + + +class TestJA_JPPhonemizer(unittest.TestCase): + def setUp(self): + self.phonemizer = JA_JP_Phonemizer() + self._TEST_CASES = """ + どちらに行きますか?/dochiraniikimasuka? + 今日は温泉に、行きます。/kyo:waoNseNni,ikimasu. + 「A」から「Z」までです。/e:karazeqtomadedesu. + そうですね!/so:desune! + クジラは哺乳類です。/kujirawahonyu:ruidesu. + ヴィディオを見ます。/bidioomimasu. + 今日は8月22日です/kyo:wahachigatsuniju:ninichidesu + xyzとαβγ/eqkusuwaizeqtotoarufabe:tagaNma + 値段は$12.34です/nedaNwaju:niteNsaNyoNdorudesu + """ + + def test_phonemize(self): + for line in self._TEST_CASES.strip().split("\n"): + text, phone = line.split("/") + self.assertEqual(self.phonemizer.phonemize(text, separator=""), phone) + + def test_name(self): + self.assertEqual(self.phonemizer.name(), "ja_jp_phonemizer") + + def test_get_supported_languages(self): + self.assertIsInstance(self.phonemizer.supported_languages(), dict) + + def test_get_version(self): + self.assertIsInstance(self.phonemizer.version(), str) + + def test_is_available(self): + self.assertTrue(self.phonemizer.is_available()) + + +class TestZH_CN_Phonemizer(unittest.TestCase): + def setUp(self): + self.phonemizer = ZH_CN_Phonemizer() + self._TEST_CASES = "" + + def test_phonemize(self): + # TODO: implement ZH phonemizer tests + pass + + def test_name(self): + self.assertEqual(self.phonemizer.name(), "zh_cn_phonemizer") + + def test_get_supported_languages(self): + self.assertIsInstance(self.phonemizer.supported_languages(), dict) + + def test_get_version(self): + self.assertIsInstance(self.phonemizer.version(), str) + + def test_is_available(self): + self.assertTrue(self.phonemizer.is_available()) \ No newline at end of file