mirror of https://github.com/coqui-ai/TTS.git
Add neon models (#2140)
* Add neon ljspeech vits model * Add neon german model * Update .models.json * Add neon spanish model * Add french model * Add Dutch model * Add Hungarian model * Add Greek model * Remove uneeded description * Update .models.json * Update .models.json * Handling neon models * Add all neon models * Update .models.json * Split zoo_tests * Update test names * Update model testing Co-authored-by: Eren Gölge <erogol@hotmail.com>pull/1942/head
parent
a0f31df481
commit
ff9b63d02a
|
@ -0,0 +1,52 @@
|
|||
name: zoo-tests-0
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
jobs:
|
||||
check_skip:
|
||||
runs-on: ubuntu-latest
|
||||
if: "! contains(github.event.head_commit.message, '[ci skip]')"
|
||||
steps:
|
||||
- run: echo "${{ github.event.head_commit.message }}"
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: x64
|
||||
cache: 'pip'
|
||||
cache-dependency-path: 'requirements*'
|
||||
- name: check OS
|
||||
run: cat /etc/os-release
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git make gcc
|
||||
sudo apt-get install espeak espeak-ng
|
||||
make system-deps
|
||||
- name: Install/upgrade Python setup deps
|
||||
run: python3 -m pip install --upgrade pip setuptools wheel
|
||||
- name: Replace scarf urls
|
||||
run: |
|
||||
sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
|
||||
- name: Install TTS
|
||||
run: |
|
||||
python3 -m pip install .[all]
|
||||
python3 setup.py egg_info
|
||||
- name: Unit tests
|
||||
run: |
|
||||
nose2 -F -v -B TTS tests.zoo_tests.test_models.test_models_offset_0_step_3
|
||||
nose2 -F -v -B TTS tests.zoo_tests.test_models.test_voice_conversion
|
|
@ -1,4 +1,4 @@
|
|||
name: zoo-tests
|
||||
name: zoo-tests-1
|
||||
|
||||
on:
|
||||
push:
|
||||
|
@ -47,4 +47,4 @@ jobs:
|
|||
python3 -m pip install .[all]
|
||||
python3 setup.py egg_info
|
||||
- name: Unit tests
|
||||
run: make test_zoo
|
||||
run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_1_step_3
|
|
@ -0,0 +1,50 @@
|
|||
name: zoo-tests-2
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
jobs:
|
||||
check_skip:
|
||||
runs-on: ubuntu-latest
|
||||
if: "! contains(github.event.head_commit.message, '[ci skip]')"
|
||||
steps:
|
||||
- run: echo "${{ github.event.head_commit.message }}"
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: x64
|
||||
cache: 'pip'
|
||||
cache-dependency-path: 'requirements*'
|
||||
- name: check OS
|
||||
run: cat /etc/os-release
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git make gcc
|
||||
sudo apt-get install espeak espeak-ng
|
||||
make system-deps
|
||||
- name: Install/upgrade Python setup deps
|
||||
run: python3 -m pip install --upgrade pip setuptools wheel
|
||||
- name: Replace scarf urls
|
||||
run: |
|
||||
sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
|
||||
- name: Install TTS
|
||||
run: |
|
||||
python3 -m pip install .[all]
|
||||
python3 setup.py egg_info
|
||||
- name: Unit tests
|
||||
run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_2_step_3
|
255
TTS/.models.json
255
TTS/.models.json
|
@ -12,6 +12,61 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"bg": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"cs": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"da": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"et": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ga": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"en": {
|
||||
"ek1": {
|
||||
"tacotron2": {
|
||||
|
@ -79,6 +134,14 @@
|
|||
"license": "apache 2.0",
|
||||
"contact": "egolge@coqui.com"
|
||||
},
|
||||
"vits--neon": {
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--en--ljspeech--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause",
|
||||
"contact": null,
|
||||
"commit": null
|
||||
},
|
||||
"fast_pitch": {
|
||||
"description": "FastPitch model trained on LJSpeech using the Aligner Network",
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip",
|
||||
|
@ -151,18 +214,36 @@
|
|||
"license": "MPL",
|
||||
"contact": "egolge@coqui.com"
|
||||
}
|
||||
}
|
||||
},
|
||||
"css10":{
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fr": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip",
|
||||
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
|
||||
"commit": "",
|
||||
"commit": null,
|
||||
"author": "Eren Gölge @erogol",
|
||||
"license": "MPL",
|
||||
"contact": "egolge@coqui.com"
|
||||
}
|
||||
},
|
||||
"css10":{
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"uk":{
|
||||
|
@ -174,6 +255,13 @@
|
|||
"license": "MIT",
|
||||
"contact": "",
|
||||
"default_vocoder": "vocoder_models/uk/mai/multiband-melgan"
|
||||
},
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -198,6 +286,15 @@
|
|||
"stats_file": null,
|
||||
"commit": "540d811"
|
||||
}
|
||||
},
|
||||
"css10":{
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"de": {
|
||||
|
@ -224,6 +321,15 @@
|
|||
"license": "apache 2.0",
|
||||
"commit": "unknown"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits-neon":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause",
|
||||
"commit": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
|
@ -359,6 +465,149 @@
|
|||
"commit": "1b22f03"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hu": {
|
||||
"css10": {
|
||||
"vits": {
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hu--css10--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"el": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--el--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fi": {
|
||||
"css10": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hr": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lt": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lv": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"mt": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pl": {
|
||||
"mai_female": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pt": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ro": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sk": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sl": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sv": {
|
||||
"cv": {
|
||||
"vits":{
|
||||
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip",
|
||||
"default_vocoder": null,
|
||||
"commit": null,
|
||||
"author": "@NeonGeckoCom",
|
||||
"license": "bsd-3-clause"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"vocoder_models": {
|
||||
|
@ -512,4 +761,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,7 +213,12 @@ class Synthesizer(object):
|
|||
speaker_embedding = None
|
||||
speaker_id = None
|
||||
if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "name_to_id"):
|
||||
if speaker_name and isinstance(speaker_name, str):
|
||||
|
||||
# handle Neon models with single speaker.
|
||||
if len(self.tts_model.speaker_manager.name_to_id) == 1:
|
||||
speaker_id = list(self.tts_model.speaker_manager.name_to_id.values())[0]
|
||||
|
||||
elif speaker_name and isinstance(speaker_name, str):
|
||||
if self.tts_config.use_d_vector_file:
|
||||
# get the average speaker embedding from the saved d_vectors.
|
||||
speaker_embedding = self.tts_model.speaker_manager.get_mean_embedding(
|
||||
|
@ -243,7 +248,11 @@ class Synthesizer(object):
|
|||
if self.tts_languages_file or (
|
||||
hasattr(self.tts_model, "language_manager") and self.tts_model.language_manager is not None
|
||||
):
|
||||
if language_name and isinstance(language_name, str):
|
||||
|
||||
if len(self.tts_model.language_manager.name_to_id) == 1:
|
||||
language_id = list(self.tts_model.language_manager.name_to_id.values())[0]
|
||||
|
||||
elif language_name and isinstance(language_name, str):
|
||||
language_id = self.tts_model.language_manager.name_to_id[language_name]
|
||||
|
||||
elif not language_name:
|
||||
|
|
|
@ -10,14 +10,13 @@ from TTS.utils.generic_utils import get_user_data_dir
|
|||
from TTS.utils.manage import ModelManager
|
||||
|
||||
|
||||
def test_run_all_models():
|
||||
def run_models(offset=0, step=1):
|
||||
"""Check if all the models are downloadable and tts models run correctly."""
|
||||
print(" > Run synthesizer with all the models.")
|
||||
download_dir = get_user_data_dir("tts")
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False)
|
||||
model_names = manager.list_models()
|
||||
for model_name in model_names:
|
||||
for model_name in model_names[offset::step]:
|
||||
print(f"\n > Run - {model_name}")
|
||||
model_path, _, _ = manager.download_model(model_name)
|
||||
if "tts_models" in model_name:
|
||||
|
@ -50,15 +49,27 @@ def test_run_all_models():
|
|||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
|
||||
)
|
||||
# remove downloaded models
|
||||
shutil.rmtree(download_dir)
|
||||
shutil.rmtree(local_download_dir)
|
||||
shutil.rmtree(get_user_data_dir("tts"))
|
||||
else:
|
||||
# only download the model
|
||||
manager.download_model(model_name)
|
||||
print(f" | > OK: {model_name}")
|
||||
|
||||
folders = glob.glob(os.path.join(manager.output_prefix, "*"))
|
||||
assert len(folders) == len(model_names)
|
||||
shutil.rmtree(manager.output_prefix)
|
||||
# folders = glob.glob(os.path.join(manager.output_prefix, "*"))
|
||||
# assert len(folders) == len(model_names) // step
|
||||
|
||||
|
||||
def test_models_offset_0_step_3():
|
||||
run_models(offset=0, step=3)
|
||||
|
||||
|
||||
def test_models_offset_1_step_3():
|
||||
run_models(offset=1, step=3)
|
||||
|
||||
|
||||
def test_models_offset_2_step_3():
|
||||
run_models(offset=2, step=3)
|
||||
|
||||
|
||||
def test_voice_conversion():
|
||||
|
|
Loading…
Reference in New Issue