From ff9b63d02a77e6ea5dca3529f6d223e41ea731fb Mon Sep 17 00:00:00 2001 From: logan hart Date: Wed, 16 Nov 2022 10:12:39 -0500 Subject: [PATCH] Add neon models (#2140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add neon ljspeech vits model * Add neon german model * Update .models.json * Add neon spanish model * Add french model * Add Dutch model * Add Hungarian model * Add Greek model * Remove uneeded description * Update .models.json * Update .models.json * Handling neon models * Add all neon models * Update .models.json * Split zoo_tests * Update test names * Update model testing Co-authored-by: Eren Gölge --- .github/workflows/zoo_tests0.yml | 52 ++++ .../{zoo_tests.yml => zoo_tests1.yml} | 4 +- .github/workflows/zoo_tests2.yml | 50 ++++ TTS/.models.json | 255 +++++++++++++++++- TTS/utils/synthesizer.py | 13 +- tests/zoo_tests/test_models.py | 25 +- 6 files changed, 385 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/zoo_tests0.yml rename .github/workflows/{zoo_tests.yml => zoo_tests1.yml} (91%) create mode 100644 .github/workflows/zoo_tests2.yml diff --git a/.github/workflows/zoo_tests0.yml b/.github/workflows/zoo_tests0.yml new file mode 100644 index 00000000..01e1c400 --- /dev/null +++ b/.github/workflows/zoo_tests0.yml @@ -0,0 +1,52 @@ +name: zoo-tests-0 + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + check_skip: + runs-on: ubuntu-latest + if: "! contains(github.event.head_commit.message, '[ci skip]')" + steps: + - run: echo "${{ github.event.head_commit.message }}" + + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [3.7, 3.8, 3.9, "3.10"] + experimental: [false] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: 'requirements*' + - name: check OS + run: cat /etc/os-release + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y git make gcc + sudo apt-get install espeak espeak-ng + make system-deps + - name: Install/upgrade Python setup deps + run: python3 -m pip install --upgrade pip setuptools wheel + - name: Replace scarf urls + run: | + sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json + - name: Install TTS + run: | + python3 -m pip install .[all] + python3 setup.py egg_info + - name: Unit tests + run: | + nose2 -F -v -B TTS tests.zoo_tests.test_models.test_models_offset_0_step_3 + nose2 -F -v -B TTS tests.zoo_tests.test_models.test_voice_conversion diff --git a/.github/workflows/zoo_tests.yml b/.github/workflows/zoo_tests1.yml similarity index 91% rename from .github/workflows/zoo_tests.yml rename to .github/workflows/zoo_tests1.yml index bd9c695e..1650aa12 100644 --- a/.github/workflows/zoo_tests.yml +++ b/.github/workflows/zoo_tests1.yml @@ -1,4 +1,4 @@ -name: zoo-tests +name: zoo-tests-1 on: push: @@ -47,4 +47,4 @@ jobs: python3 -m pip install .[all] python3 setup.py egg_info - name: Unit tests - run: make test_zoo + run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_1_step_3 diff --git a/.github/workflows/zoo_tests2.yml b/.github/workflows/zoo_tests2.yml new file mode 100644 index 00000000..f5a155ab --- /dev/null +++ b/.github/workflows/zoo_tests2.yml @@ -0,0 +1,50 @@ +name: zoo-tests-2 + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + check_skip: + runs-on: ubuntu-latest + if: "! contains(github.event.head_commit.message, '[ci skip]')" + steps: + - run: echo "${{ github.event.head_commit.message }}" + + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [3.7, 3.8, 3.9, "3.10"] + experimental: [false] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: 'requirements*' + - name: check OS + run: cat /etc/os-release + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y git make gcc + sudo apt-get install espeak espeak-ng + make system-deps + - name: Install/upgrade Python setup deps + run: python3 -m pip install --upgrade pip setuptools wheel + - name: Replace scarf urls + run: | + sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json + - name: Install TTS + run: | + python3 -m pip install .[all] + python3 setup.py egg_info + - name: Unit tests + run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_2_step_3 diff --git a/TTS/.models.json b/TTS/.models.json index 84a7cd7d..4e2d5e47 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -12,6 +12,61 @@ } } }, + "bg": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "cs": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "da": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "et": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "ga": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, "en": { "ek1": { "tacotron2": { @@ -79,6 +134,14 @@ "license": "apache 2.0", "contact": "egolge@coqui.com" }, + "vits--neon": { + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--en--ljspeech--vits.zip", + "default_vocoder": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause", + "contact": null, + "commit": null + }, "fast_pitch": { "description": "FastPitch model trained on LJSpeech using the Aligner Network", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip", @@ -151,18 +214,36 @@ "license": "MPL", "contact": "egolge@coqui.com" } - } + }, + "css10":{ + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } }, "fr": { "mai": { "tacotron2-DDC": { "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", - "commit": "", + "commit": null, "author": "Eren Gölge @erogol", "license": "MPL", "contact": "egolge@coqui.com" } + }, + "css10":{ + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } } }, "uk":{ @@ -174,6 +255,13 @@ "license": "MIT", "contact": "", "default_vocoder": "vocoder_models/uk/mai/multiband-melgan" + }, + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" } } }, @@ -198,6 +286,15 @@ "stats_file": null, "commit": "540d811" } + }, + "css10":{ + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } } }, "de": { @@ -224,6 +321,15 @@ "license": "apache 2.0", "commit": "unknown" } + }, + "css10": { + "vits-neon":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip", + "default_vocoder": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause", + "commit": null + } } }, "ja": { @@ -359,6 +465,149 @@ "commit": "1b22f03" } } + }, + "hu": { + "css10": { + "vits": { + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hu--css10--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "el": { + "cv": { + "vits": { + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--el--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "fi": { + "css10": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "hr": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "lt": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "lv": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "mt": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "pl": { + "mai_female": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "pt": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "ro": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "sk": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "sl": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } + }, + "sv": { + "cv": { + "vits":{ + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip", + "default_vocoder": null, + "commit": null, + "author": "@NeonGeckoCom", + "license": "bsd-3-clause" + } + } } }, "vocoder_models": { @@ -512,4 +761,4 @@ } } } -} \ No newline at end of file +} diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 978d8494..bc3fc0aa 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -213,7 +213,12 @@ class Synthesizer(object): speaker_embedding = None speaker_id = None if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "name_to_id"): - if speaker_name and isinstance(speaker_name, str): + + # handle Neon models with single speaker. + if len(self.tts_model.speaker_manager.name_to_id) == 1: + speaker_id = list(self.tts_model.speaker_manager.name_to_id.values())[0] + + elif speaker_name and isinstance(speaker_name, str): if self.tts_config.use_d_vector_file: # get the average speaker embedding from the saved d_vectors. speaker_embedding = self.tts_model.speaker_manager.get_mean_embedding( @@ -243,7 +248,11 @@ class Synthesizer(object): if self.tts_languages_file or ( hasattr(self.tts_model, "language_manager") and self.tts_model.language_manager is not None ): - if language_name and isinstance(language_name, str): + + if len(self.tts_model.language_manager.name_to_id) == 1: + language_id = list(self.tts_model.language_manager.name_to_id.values())[0] + + elif language_name and isinstance(language_name, str): language_id = self.tts_model.language_manager.name_to_id[language_name] elif not language_name: diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index 7105edf4..a1f9c536 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -10,14 +10,13 @@ from TTS.utils.generic_utils import get_user_data_dir from TTS.utils.manage import ModelManager -def test_run_all_models(): +def run_models(offset=0, step=1): """Check if all the models are downloadable and tts models run correctly.""" print(" > Run synthesizer with all the models.") - download_dir = get_user_data_dir("tts") output_path = os.path.join(get_tests_output_path(), "output.wav") manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False) model_names = manager.list_models() - for model_name in model_names: + for model_name in model_names[offset::step]: print(f"\n > Run - {model_name}") model_path, _, _ = manager.download_model(model_name) if "tts_models" in model_name: @@ -50,15 +49,27 @@ def test_run_all_models(): f'--text "This is an example." --out_path "{output_path}" --progress_bar False' ) # remove downloaded models - shutil.rmtree(download_dir) + shutil.rmtree(local_download_dir) + shutil.rmtree(get_user_data_dir("tts")) else: # only download the model manager.download_model(model_name) print(f" | > OK: {model_name}") - folders = glob.glob(os.path.join(manager.output_prefix, "*")) - assert len(folders) == len(model_names) - shutil.rmtree(manager.output_prefix) + # folders = glob.glob(os.path.join(manager.output_prefix, "*")) + # assert len(folders) == len(model_names) // step + + +def test_models_offset_0_step_3(): + run_models(offset=0, step=3) + + +def test_models_offset_1_step_3(): + run_models(offset=1, step=3) + + +def test_models_offset_2_step_3(): + run_models(offset=2, step=3) def test_voice_conversion():