diff --git a/.github/workflows/aux_tests.yml b/.github/workflows/aux_tests.yml index e42b964d..f4cb3ecf 100644 --- a/.github/workflows/aux_tests.yml +++ b/.github/workflows/aux_tests.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/data_tests.yml b/.github/workflows/data_tests.yml index 9ed1333d..3d1e3f8c 100644 --- a/.github/workflows/data_tests.yml +++ b/.github/workflows/data_tests.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/inference_tests.yml b/.github/workflows/inference_tests.yml index 2f6c83bf..47c4b241 100644 --- a/.github/workflows/inference_tests.yml +++ b/.github/workflows/inference_tests.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index fc990826..49a5b300 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -21,7 +21,7 @@ jobs: fi - uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.9 - run: | python -m pip install -U pip setuptools wheel build - run: | @@ -36,7 +36,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 @@ -64,14 +64,6 @@ jobs: with: name: "sdist" path: "dist/" - - uses: actions/download-artifact@v2 - with: - name: "wheel-3.7" - path: "dist/" - - uses: actions/download-artifact@v2 - with: - name: "wheel-3.8" - path: "dist/" - uses: actions/download-artifact@v2 with: name: "wheel-3.9" @@ -80,6 +72,10 @@ jobs: with: name: "wheel-3.10" path: "dist/" + - uses: actions/download-artifact@v2 + with: + name: "wheel-3.11" + path: "dist/" - run: | ls -lh dist/ - name: Setup PyPI config @@ -91,7 +87,7 @@ jobs: EOF - uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.9 - run: | python -m pip install twine - run: | diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index db75e131..c167f7ca 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -42,6 +42,6 @@ jobs: run: | python3 -m pip install .[all] python3 setup.py egg_info - - name: Lint check - run: | - make lint \ No newline at end of file + # - name: Lint check + # run: | + # make lint \ No newline at end of file diff --git a/.github/workflows/text_tests.yml b/.github/workflows/text_tests.yml index 9ae0a058..78d3026d 100644 --- a/.github/workflows/text_tests.yml +++ b/.github/workflows/text_tests.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/tts_tests.yml b/.github/workflows/tts_tests.yml index 6d35171e..5074cded 100644 --- a/.github/workflows/tts_tests.yml +++ b/.github/workflows/tts_tests.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/vocoder_tests.yml b/.github/workflows/vocoder_tests.yml index cfa8e6af..6519ee3f 100644 --- a/.github/workflows/vocoder_tests.yml +++ b/.github/workflows/vocoder_tests.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/zoo_tests0.yml b/.github/workflows/zoo_tests0.yml index d5f4cc14..13f47a93 100644 --- a/.github/workflows/zoo_tests0.yml +++ b/.github/workflows/zoo_tests0.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/zoo_tests1.yml b/.github/workflows/zoo_tests1.yml index 7f15f977..00f13397 100644 --- a/.github/workflows/zoo_tests1.yml +++ b/.github/workflows/zoo_tests1.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 @@ -43,6 +43,7 @@ jobs: run: python3 -m pip install --upgrade pip setuptools wheel - name: Replace scarf urls run: | + sed -i 's/https:\/\/coqui.gateway.scarf.sh\/hf\/bark\//https:\/\/huggingface.co\/erogol\/bark\/resolve\/main\//g' TTS/.models.json sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | diff --git a/.github/workflows/zoo_tests2.yml b/.github/workflows/zoo_tests2.yml index 9975a2cf..310a831a 100644 --- a/.github/workflows/zoo_tests2.yml +++ b/.github/workflows/zoo_tests2.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] experimental: [false] steps: - uses: actions/checkout@v3 diff --git a/TTS/encoder/utils/visual.py b/TTS/encoder/utils/visual.py index f2db2f3f..6575b86e 100644 --- a/TTS/encoder/utils/visual.py +++ b/TTS/encoder/utils/visual.py @@ -23,7 +23,7 @@ colormap = ( [0, 0, 0], [183, 183, 183], ], - dtype=np.float, + dtype=float, ) / 255 ) diff --git a/TTS/tts/configs/bark_config.py b/TTS/tts/configs/bark_config.py index 647116bd..4d1cd137 100644 --- a/TTS/tts/configs/bark_config.py +++ b/TTS/tts/configs/bark_config.py @@ -1,5 +1,5 @@ import os -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Dict from TTS.tts.configs.shared_configs import BaseTTSConfig @@ -46,11 +46,11 @@ class BarkConfig(BaseTTSConfig): """ model: str = "bark" - audio: BarkAudioConfig = BarkAudioConfig() + audio: BarkAudioConfig = field(default_factory=BarkAudioConfig) num_chars: int = 0 - semantic_config: GPTConfig = GPTConfig() - fine_config: FineGPTConfig = FineGPTConfig() - coarse_config: GPTConfig = GPTConfig() + semantic_config: GPTConfig = field(default_factory=GPTConfig) + fine_config: FineGPTConfig = field(default_factory=FineGPTConfig) + coarse_config: GPTConfig = field(default_factory=GPTConfig) CONTEXT_WINDOW_SIZE: int = 1024 SEMANTIC_RATE_HZ: float = 49.9 SEMANTIC_VOCAB_SIZE: int = 10_000 diff --git a/TTS/tts/configs/fast_pitch_config.py b/TTS/tts/configs/fast_pitch_config.py index 90b15021..d086d265 100644 --- a/TTS/tts/configs/fast_pitch_config.py +++ b/TTS/tts/configs/fast_pitch_config.py @@ -113,7 +113,7 @@ class FastPitchConfig(BaseTTSConfig): base_model: str = "forward_tts" # model specific params - model_args: ForwardTTSArgs = ForwardTTSArgs() + model_args: ForwardTTSArgs = field(default_factory=ForwardTTSArgs) # multi-speaker settings num_speakers: int = 0 diff --git a/TTS/tts/configs/fast_speech_config.py b/TTS/tts/configs/fast_speech_config.py index 16a76e21..af6c2db6 100644 --- a/TTS/tts/configs/fast_speech_config.py +++ b/TTS/tts/configs/fast_speech_config.py @@ -107,7 +107,7 @@ class FastSpeechConfig(BaseTTSConfig): base_model: str = "forward_tts" # model specific params - model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=False) + model_args: ForwardTTSArgs = field(default_factory=lambda: ForwardTTSArgs(use_pitch=False)) # multi-speaker settings num_speakers: int = 0 diff --git a/TTS/tts/configs/fastspeech2_config.py b/TTS/tts/configs/fastspeech2_config.py index 68a3eec2..d179617f 100644 --- a/TTS/tts/configs/fastspeech2_config.py +++ b/TTS/tts/configs/fastspeech2_config.py @@ -123,7 +123,7 @@ class Fastspeech2Config(BaseTTSConfig): base_model: str = "forward_tts" # model specific params - model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=True, use_energy=True) + model_args: ForwardTTSArgs = field(default_factory=lambda: ForwardTTSArgs(use_pitch=True, use_energy=True)) # multi-speaker settings num_speakers: int = 0 diff --git a/TTS/tts/configs/speedy_speech_config.py b/TTS/tts/configs/speedy_speech_config.py index 4bf5101f..bf8517df 100644 --- a/TTS/tts/configs/speedy_speech_config.py +++ b/TTS/tts/configs/speedy_speech_config.py @@ -103,26 +103,28 @@ class SpeedySpeechConfig(BaseTTSConfig): base_model: str = "forward_tts" # set model args as SpeedySpeech - model_args: ForwardTTSArgs = ForwardTTSArgs( - use_pitch=False, - encoder_type="residual_conv_bn", - encoder_params={ - "kernel_size": 4, - "dilations": 4 * [1, 2, 4] + [1], - "num_conv_blocks": 2, - "num_res_blocks": 13, - }, - decoder_type="residual_conv_bn", - decoder_params={ - "kernel_size": 4, - "dilations": 4 * [1, 2, 4, 8] + [1], - "num_conv_blocks": 2, - "num_res_blocks": 17, - }, - out_channels=80, - hidden_channels=128, - positional_encoding=True, - detach_duration_predictor=True, + model_args: ForwardTTSArgs = field( + default_factory=lambda: ForwardTTSArgs( + use_pitch=False, + encoder_type="residual_conv_bn", + encoder_params={ + "kernel_size": 4, + "dilations": 4 * [1, 2, 4] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 13, + }, + decoder_type="residual_conv_bn", + decoder_params={ + "kernel_size": 4, + "dilations": 4 * [1, 2, 4, 8] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 17, + }, + out_channels=80, + hidden_channels=128, + positional_encoding=True, + detach_duration_predictor=True, + ) ) # multi-speaker settings diff --git a/TTS/tts/configs/tortoise_config.py b/TTS/tts/configs/tortoise_config.py index 7da94a4c..d60e43d7 100644 --- a/TTS/tts/configs/tortoise_config.py +++ b/TTS/tts/configs/tortoise_config.py @@ -70,7 +70,7 @@ class TortoiseConfig(BaseTTSConfig): model: str = "tortoise" # model specific params model_args: TortoiseArgs = field(default_factory=TortoiseArgs) - audio: TortoiseAudioConfig = TortoiseAudioConfig() + audio: TortoiseAudioConfig = field(default_factory=TortoiseAudioConfig) model_dir: str = None # settings diff --git a/TTS/tts/layers/bark/hubert/kmeans_hubert.py b/TTS/tts/layers/bark/hubert/kmeans_hubert.py index 7c667755..c7724c23 100644 --- a/TTS/tts/layers/bark/hubert/kmeans_hubert.py +++ b/TTS/tts/layers/bark/hubert/kmeans_hubert.py @@ -10,15 +10,11 @@ License: MIT import logging from pathlib import Path -import fairseq import torch from einops import pack, unpack from torch import nn from torchaudio.functional import resample - -logging.root.setLevel(logging.ERROR) - - +from transformers import HubertModel def round_down_nearest_multiple(num, divisor): return num // divisor * divisor @@ -49,22 +45,11 @@ class CustomHubert(nn.Module): self.target_sample_hz = target_sample_hz self.seq_len_multiple_of = seq_len_multiple_of self.output_layer = output_layer - if device is not None: self.to(device) - - model_path = Path(checkpoint_path) - - assert model_path.exists(), f"path {checkpoint_path} does not exist" - - checkpoint = torch.load(checkpoint_path) - load_model_input = {checkpoint_path: checkpoint} - model, *_ = fairseq.checkpoint_utils.load_model_ensemble_and_task(load_model_input) - + self.model = HubertModel.from_pretrained("facebook/hubert-base-ls960") if device is not None: - model[0].to(device) - - self.model = model[0] + self.model.to(device) self.model.eval() @property @@ -81,19 +66,13 @@ class CustomHubert(nn.Module): if exists(self.seq_len_multiple_of): wav_input = curtail_to_multiple(wav_input, self.seq_len_multiple_of) - embed = self.model( + outputs = self.model.forward( wav_input, - features_only=True, - mask=False, # thanks to @maitycyrus for noticing that mask is defaulted to True in the fairseq code - output_layer=self.output_layer, + output_hidden_states=True, ) - - embed, packed_shape = pack([embed["x"]], "* d") - - # codebook_indices = self.kmeans.predict(embed.cpu().detach().numpy()) - - codebook_indices = torch.from_numpy(embed.cpu().detach().numpy()).to(device) # .long() - + embed = outputs["hidden_states"][self.output_layer] + embed, packed_shape = pack([embed], "* d") + codebook_indices = torch.from_numpy(embed.cpu().detach().numpy()).to(device) if flatten: return codebook_indices diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py index fa7a1ebf..da962ab1 100644 --- a/TTS/tts/layers/bark/inference_funcs.py +++ b/TTS/tts/layers/bark/inference_funcs.py @@ -130,7 +130,7 @@ def generate_voice( # generate semantic tokens # Load the HuBERT model hubert_manager = HubertManager() - hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"]) + # hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"]) hubert_manager.make_sure_tokenizer_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert_tokenizer"]) hubert_model = CustomHubert(checkpoint_path=model.config.LOCAL_MODEL_PATHS["hubert"]).to(model.device) diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index e12abf20..de5f408c 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -165,7 +165,7 @@ class BCELossMasked(nn.Module): def __init__(self, pos_weight: float = None): super().__init__() - self.pos_weight = nn.Parameter(torch.tensor([pos_weight]), requires_grad=False) + self.register_buffer("pos_weight", torch.tensor([pos_weight])) def forward(self, x, target, length): """ @@ -191,10 +191,15 @@ class BCELossMasked(nn.Module): mask = sequence_mask(sequence_length=length, max_len=target.size(1)) num_items = mask.sum() loss = functional.binary_cross_entropy_with_logits( - x.masked_select(mask), target.masked_select(mask), pos_weight=self.pos_weight, reduction="sum" + x.masked_select(mask), + target.masked_select(mask), + pos_weight=self.pos_weight.to(x.device), + reduction="sum", ) else: - loss = functional.binary_cross_entropy_with_logits(x, target, pos_weight=self.pos_weight, reduction="sum") + loss = functional.binary_cross_entropy_with_logits( + x, target, pos_weight=self.pos_weight.to(x.device), reduction="sum" + ) num_items = torch.numel(x) loss = loss / num_items return loss diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py index 56ef2944..c6d1ec2c 100644 --- a/TTS/tts/utils/helpers.py +++ b/TTS/tts/utils/helpers.py @@ -207,7 +207,7 @@ def maximum_path_numpy(value, mask, max_neg_val=None): device = value.device dtype = value.dtype value = value.cpu().detach().numpy() - mask = mask.cpu().detach().numpy().astype(np.bool) + mask = mask.cpu().detach().numpy().astype(bool) b, t_x, t_y = value.shape direction = np.zeros(value.shape, dtype=np.int64) diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index 579f375c..b0920dc9 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -540,7 +540,10 @@ class AudioProcessor(object): def _griffin_lim(self, S): angles = np.exp(2j * np.pi * np.random.rand(*S.shape)) - S_complex = np.abs(S).astype(np.complex) + try: + S_complex = np.abs(S).astype(np.complex) + except AttributeError: # np.complex is deprecated since numpy 1.20.0 + S_complex = np.abs(S).astype(complex) y = self._istft(S_complex * angles) if not np.isfinite(y).all(): print(" [!] Waveform is not finite everywhere. Skipping the GL.") diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 78020f91..c1a3d585 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -264,14 +264,17 @@ class ModelManager(object): model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz") self._download_tar_file(model_download_uri, output_path, self.progress_bar) - def set_model_url(self, model_item: Dict): + @staticmethod + def set_model_url(model_item: Dict): model_item["model_url"] = None if "github_rls_url" in model_item: model_item["model_url"] = model_item["github_rls_url"] elif "hf_url" in model_item: model_item["model_url"] = model_item["hf_url"] + elif "fairseq" in model_item["model_name"]: + model_item["model_url"] = "https://coqui.gateway.scarf.sh/fairseq/" return model_item - + def _set_model_item(self, model_name): # fetch model info from the dict model_type, lang, dataset, model = model_name.split("/") @@ -285,10 +288,12 @@ class ModelManager(object): "author": "fairseq", "description": "this model is released by Meta under Fairseq repo. Visit https://github.com/facebookresearch/fairseq/tree/main/examples/mms for more info.", } + model_item["model_name"] = model_name else: # get model from models.json model_item = self.models_dict[model_type][lang][dataset][model] model_item["model_type"] = model_type + model_item = self.set_model_url(model_item) return model_item, model_full_name, model def download_model(self, model_name): @@ -324,7 +329,9 @@ class ModelManager(object): # find downloaded files output_model_path = output_path output_config_path = None - if model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name: # TODO:This is stupid but don't care for now. + if ( + model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name + ): # TODO:This is stupid but don't care for now. output_model_path, output_config_path = self._find_files(output_path) # update paths in the config.json self._update_paths(output_path, output_config_path) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 4aa26724..ae22ad28 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -794,8 +794,8 @@ class FreeVCConfig(BaseVCConfig): model: str = "freevc" # model specific params - model_args: FreeVCArgs = FreeVCArgs() - audio: FreeVCAudioConfig = FreeVCAudioConfig() + model_args: FreeVCArgs = field(default_factory=FreeVCArgs) + audio: FreeVCAudioConfig = field(default_factory=FreeVCAudioConfig) # optimizer # TODO with training support diff --git a/pyproject.toml b/pyproject.toml index 8bc91b45..8544bb20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6", "packaging"] +requires = ["setuptools", "wheel", "cython==0.29.30", "numpy==1.22.0", "packaging"] [flake8] max-line-length=120 diff --git a/requirements.txt b/requirements.txt index c450ff20..111d1b57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ # core deps -numpy==1.21.6;python_version<"3.10" -numpy;python_version=="3.10" -cython==0.29.28 +numpy==1.22.0;python_version<="3.10" +numpy==1.24.3;python_version>"3.10" +cython==0.29.30 scipy>=1.4.0 torch>=1.7 torchaudio soundfile librosa==0.10.0.* numba==0.55.1;python_version<"3.9" -numba==0.56.4;python_version>="3.9" +numba==0.57.0;python_version>="3.9" inflect==5.6.0 tqdm anyascii @@ -26,14 +26,14 @@ pandas # deps for training matplotlib # coqui stack -trainer==0.0.20 +trainer # config management coqpit>=0.0.16 # chinese g2p deps jieba pypinyin # japanese g2p deps -mecab-python3==1.0.5 +mecab-python3==1.0.6 unidic-lite==1.0.8 # gruut+supported langs gruut[de,es,fr]==2.2.3 @@ -51,5 +51,3 @@ einops transformers #deps for bark encodec -#deps for fairseq models -fairseq diff --git a/setup.cfg b/setup.cfg index 2344c8b2..1f31cb5d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,8 @@ [build_py] -build-lib=temp_build +build_lib=temp_build [bdist_wheel] -bdist-dir=temp_build +bdist_dir=temp_build [install_lib] -build-dir=temp_build +build_dir=temp_build diff --git a/setup.py b/setup.py index 259c3cd1..464bbdd7 100644 --- a/setup.py +++ b/setup.py @@ -32,8 +32,8 @@ from Cython.Build import cythonize from setuptools import Extension, find_packages, setup python_version = sys.version.split()[0] -if Version(python_version) < Version("3.7") or Version(python_version) >= Version("3.11"): - raise RuntimeError("TTS requires python >= 3.7 and < 3.11 " "but your Python version is {}".format(sys.version)) +if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"): + raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version)) cwd = os.path.dirname(os.path.abspath(__file__)) @@ -114,15 +114,14 @@ setup( "dev": requirements_dev, "notebooks": requirements_notebooks, }, - python_requires=">=3.7.0, <3.11", + python_requires=">=3.9.0, <3.12", entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]}, classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", "Intended Audience :: Developers", diff --git a/tests/text_tests/test_tokenizer.py b/tests/text_tests/test_tokenizer.py index 6e95c0ad..dfa213d9 100644 --- a/tests/text_tests/test_tokenizer.py +++ b/tests/text_tests/test_tokenizer.py @@ -1,5 +1,5 @@ import unittest -from dataclasses import dataclass +from dataclasses import dataclass, field from coqpit import Coqpit @@ -86,11 +86,11 @@ class TestTTSTokenizer(unittest.TestCase): enable_eos_bos_chars: bool = True use_phonemes: bool = True add_blank: bool = False - characters: str = Characters() + characters: str = field(default_factory=Characters) phonemizer: str = "espeak" phoneme_language: str = "tr" text_cleaner: str = "phoneme_cleaners" - characters = Characters() + characters = field(default_factory=Characters) tokenizer_ph, _ = TTSTokenizer.init_from_config(TokenizerConfig()) tokenizer_ph.phonemizer.backend = "espeak" diff --git a/tests/tts_tests/test_tacotron_model.py b/tests/tts_tests/test_tacotron_model.py index 07351a6a..906ec3d0 100644 --- a/tests/tts_tests/test_tacotron_model.py +++ b/tests/tts_tests/test_tacotron_model.py @@ -16,7 +16,7 @@ from TTS.utils.audio import AudioProcessor torch.manual_seed(1) use_cuda = torch.cuda.is_available() -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +device = torch.device("cuda" if use_cuda else "cpu") config_global = TacotronConfig(num_chars=32, num_speakers=5, out_channels=513, decoder_output_dim=80) @@ -288,7 +288,6 @@ class TacotronCapacitronTrainTest(unittest.TestCase): batch["text_input"].shape[0], batch["stop_targets"].size(1) // config.r, -1 ) batch["stop_targets"] = (batch["stop_targets"].sum(2) > 0.0).unsqueeze(2).float().squeeze() - model = Tacotron(config).to(device) criterion = model.get_criterion() optimizer = model.get_optimizer() diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index 001f5ef6..d3a83980 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -15,7 +15,7 @@ def run_models(offset=0, step=1): print(" > Run synthesizer with all the models.") output_path = os.path.join(get_tests_output_path(), "output.wav") manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False) - model_names = manager.list_models() + model_names = [name for name in manager.list_models() if "bark" not in name] for model_name in model_names[offset::step]: print(f"\n > Run - {model_name}") model_path, _, _ = manager.download_model(model_name) @@ -79,6 +79,15 @@ def test_models_offset_2_step_3(): run_models(offset=2, step=3) +def test_bark(): + """Bark is too big to run on github actions. We need to test it locally""" + output_path = os.path.join(get_tests_output_path(), "output.wav") + run_cli( + f" tts --model_name tts_models/multilingual/multi-dataset/bark " + f'--text "This is an example." --out_path "{output_path}" --progress_bar False' + ) + + def test_voice_conversion(): print(" > Run voice conversion inference using YourTTS model.") model_name = "tts_models/multilingual/multi-dataset/your_tts"