diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 96fc11b4..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,51 +0,0 @@ -version: 2 - -workflows: - version: 2 - test: - jobs: - - test-3.6 - - test-3.7 - - test-3.8 - -executor: ubuntu-latest - -on: - push: - pull_request: - types: [opened, synchronize, reopened] - -jobs: - test-3.6: &test-template - docker: - - image: circleci/python:3.6 - resource_class: large - working_directory: ~/repo - steps: - - checkout - - run: | - sudo apt update - sudo apt install espeak-ng git - - run: sudo pip install --upgrade pip - - run: sudo pip install -e . - - run: | - sudo pip install --quiet --upgrade cardboardlint pylint - cardboardlinter --refspec ${CIRCLE_BRANCH} -n auto - - run: nosetests tests --nocapture --processes=0 --process-timeout=20 --process-restartworker - - run: | - sudo ./tests/test_glow-tts_train.sh - sudo ./tests/test_tacotron_train.sh - sudo ./tests/test_vocoder_gan_train.sh - sudo ./tests/test_vocoder_wavegrad_train.sh - sudo ./tests/test_vocoder_wavernn_train.sh - sudo ./tests/test_speedy_speech_train.sh - - test-3.7: - <<: *test-template - docker: - - image: circleci/python:3.7 - - test-3.8: - <<: *test-template - docker: - - image: circleci/python:3.8 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ebb59216..5e874424 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -35,8 +35,9 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install -y espeak-ng git + sudo apt install -y git make sudo apt install -y python3-wheel gcc + make system-deps - name: Upgrade pip # so we can take advantage of pyproject.toml build-dependency support run: python3 -m pip install --upgrade pip @@ -46,17 +47,6 @@ jobs: python3 setup.py egg_info - name: Lint check run: | - cardboardlinter + make lint - name: Unit tests - run: nosetests tests --nocapture --processes=0 --process-timeout=20 --process-restartworker - - name: Test scripts - run: | - ./tests/test_demo_server.sh - ./tests/test_glow-tts_train.sh - ./tests/test_tacotron_train.sh - ./tests/test_vocoder_gan_train.sh - ./tests/test_vocoder_wavegrad_train.sh - ./tests/test_vocoder_wavernn_train.sh - ./tests/test_speedy_speech_train.sh - ./tests/test_resample.sh - ./tests/test_compute_statistics.sh + run: make test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..1ae28644 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: + - repo: 'https://github.com/pre-commit/pre-commit-hooks' + rev: v2.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: 'https://github.com/psf/black' + rev: 20.8b1 + hooks: + - id: black + language_version: python3 \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c9d08f37..5b2a28bb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,51 +1,118 @@ # Contribution guidelines -This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). +Welcome to the 🐸TTS! -Before making a Pull Request, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the differences between your work and master, you can use the follow command: +This repository is governed by the Contributor Covenant Code of Conduct - [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md). -```bash -pip install pylint cardboardlint -cardboardlinter --refspec master -``` +## Where to start. +We welcome everyone who likes to contribute to 🐸TTS. +You can contribute not only with code but with bug reports, comments, questions, answers, or just a simple tweet to spread the word. -This will compare the code against master and run the linter on all the changes. To run it automatically as a git pre-commit hook, you can do do the following: +If you like to contribute code, squash a bug but if you don't know where to start, here are some pointers. -```bash -cat <<\EOF > .git/hooks/pre-commit -#!/bin/bash -if [ ! -x "$(command -v cardboardlinter)" ]; then - exit 0 -fi +- [Github Issues Tracker](https://github.com/coqui-ai/TTS/issues) -# First, stash index and work dir, keeping only the -# to-be-committed changes in the working directory. -echo "Stashing working tree changes..." 1>&2 -old_stash=$(git rev-parse -q --verify refs/stash) -git stash save -q --keep-index -new_stash=$(git rev-parse -q --verify refs/stash) + This is a place to find feature requests, bugs. -# If there were no changes (e.g., `--amend` or `--allow-empty`) -# then nothing was stashed, and we should skip everything, -# including the tests themselves. (Presumably the tests passed -# on the previous commit, so there is no need to re-run them.) -if [ "$old_stash" = "$new_stash" ]; then - echo "No changes, skipping lint." 1>&2 - exit 0 -fi + Issues with the ```good first issue``` tag are good place for beginners to take on. -# Run tests -cardboardlinter --refspec HEAD -n auto -status=$? +- ✨**PR**✨ [pages](https://github.com/coqui-ai/TTS/pulls) with the ```🚀new version``` tag. -# Restore changes -echo "Restoring working tree changes..." 1>&2 -git reset --hard -q && git stash apply --index -q && git stash drop -q + We list all the target improvements for the next version. You can pick one of them and start contributing. -# Exit with status from test-run: nonzero prevents commit -exit $status -EOF -chmod +x .git/hooks/pre-commit -``` +- Also feel free to suggest new features, ideas and models. We're always open for new things. +## Sending a ✨**PR**✨ -This will run the linters on just the changes made in your commit. \ No newline at end of file +If you have a new feature, a model to implement, or a bug to squash, go ahead and send a ✨**PR**✨. +Please use the following steps to send a ✨**PR**✨. +Let us know if you encounter a problem along the way. + +The following steps are tested on an Ubuntu system. + +1. Fork 🐸TTS[https://github.com/coqui-ai/TTS] by clicking the fork button at the top right corner of the project page. + +2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```. + + ```bash + $ git clone git@github.com:/TTS.git + $ cd TTS + $ git remote add upstream https://github.com/coqui-ai/TTS.git + ``` + +3. Install 🐸TTS for development. + + ```bash + $ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. + $ make install + ``` + +4. Create a new branch with an informative name for your goal. + + ```bash + $ git checkout -b an_informative_name_for_my_branch + ``` + +5. Implement your changes on your new branch. + +6. Explain your code using [Google Style](https://google.github.io/styleguide/pyguide.html#381-docstrings) docstrings. + +7. Add your tests to our test suite under ```tests``` folder. It is important to show that your code works, edge cases are considered, and inform others about the intended use. + +8. Run the tests to see how your updates work with the rest of the project. You can repeat this step multiple times as you implement your changes to make sure you are on the right direction. + + ```bash + $ make tests + ``` + +9. Format your code. We use ```black``` for code and ```isort``` for ```import``` formatting. + + ```bash + $ make style + ``` + +10. Run the linter and correct the issues raised. We use ```pylint``` for linting. It helps to enforce a coding standard, offers simple refactoring suggestions. + + ```bash + $ make lint + ``` + +11. When things are good, add new files and commit your changes. + + ```bash + $ git add my_file1.py my_file2.py ... + $ git commit + ``` + + It's a good practice to regularly sync your local copy of the project with the upstream code to keep up with the recent updates. + + ```bash + $ git fetch upstream + $ git rebase upstream/master + # or for the development version + $ git rebase upstream/dev + ``` + +12. Send a PR to ```dev``` branch. + + Push your branch to your fork. + + ```bash + $ git push -u origin an_informative_name_for_my_branch + ``` + + Then go to your fork's Github page and click on 'Pull request' to send your ✨**PR**✨. + + Please set ✨**PR**✨'s target branch to ```dev``` as we use ```dev``` to work on the next version. + +13. Let's discuss until it is perfect. 💪 + + We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/coqui-ai/TTS/pulls]. + +14. Once things look perfect, We merge it to the ```dev``` branch and make it ready for the next version. + +Feel free to ping us at any step you need help using our communication channels. + +If you are new to Github or open-source contribution, These are good resources. + +- [Github Docs](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests) +- [First-Contribution](https://github.com/firstcontributions/first-contributions) \ No newline at end of file diff --git a/README.md b/README.md index 8c6e3b89..c9a283e6 100644 --- a/README.md +++ b/README.md @@ -39,12 +39,13 @@ Please use our dedicated channels for questions and discussion. Help is much mor ## 🔗 Links and Resources | Type | Links | | ------------------------------- | --------------------------------------- | -| 💾 **Installation** | [TTS/README.md](https://github.com/coqui-ai/TTS/tree/dev#install-tts)| -| 👩🏾‍🏫 **Tutorials and Examples** | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/%F0%9F%90%B8-TTS-Notebooks,-Examples-and-Tutorials) | -| 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)| -| 💻 **Docker Image** | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-coqui-aitts)| -| 🖥️ **Demo Server** | [TTS/server](https://github.com/coqui-ai/TTS/tree/master/TTS/server)| -| 🤖 **Synthesize speech** | [TTS/README.md](https://github.com/coqui-ai/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)| +| 💾 **Installation** | [TTS/README.md](https://github.com/coqui-ai/TTS/tree/dev#install-tts)| +| 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/coqui-ai/TTS/blob/main/CONTRIBUTING.md) +| 👩🏾‍🏫 **Tutorials and Examples** | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/%F0%9F%90%B8-TTS-Notebooks,-Examples-and-Tutorials) | +| 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)| +| 💻 **Docker Image** | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-coqui-aitts)| +| 🖥️ **Demo Server** | [TTS/server](https://github.com/coqui-ai/TTS/tree/master/TTS/server)| +| 🤖 **Synthesize speech** | [TTS/README.md](https://github.com/coqui-ai/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)| ## 🥇 TTS Performance

@@ -114,10 +115,19 @@ pip install -e . ``` We use ```espeak-ng``` to convert graphemes to phonemes. You might need to install separately. + ```bash sudo apt-get install espeak-ng ``` +If you are on Ubuntu (Debian), you can also run following commands for installation. + +```bash +$ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a diffent OS. +$ make install +``` + +If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/how-can-i-run-mozilla-tts-coqui-tts-training-with-cuda-on-a-windows-system). ## Directory Structure ``` |- notebooks/ (Jupyter Notebooks for model evaluation, parameter selection and data analysis.) @@ -167,11 +177,13 @@ Some of the public datasets that we successfully applied 🐸TTS: After the installation, 🐸TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under 🐸TTS. Listing released 🐸TTS models. + ```bash tts --list_models ``` Run a tts and a vocoder model from the released model list. (Simply copy and paste the full model names from the list as arguments for the command below.) + ```bash tts --text "Text for TTS" \ --model_name "///" \ @@ -180,6 +192,7 @@ tts --text "Text for TTS" \ ``` Run your own TTS model (Using Griffin-Lim Vocoder) + ```bash tts --text "Text for TTS" \ --model_path path/to/model.pth.tar \ @@ -188,6 +201,7 @@ tts --text "Text for TTS" \ ``` Run your own TTS and Vocoder models + ```bash tts --text "Text for TTS" \ --model_path path/to/config.json \ @@ -242,29 +256,7 @@ In case of any error or intercepted execution, if there is no checkpoint yet und You can also enjoy Tensorboard, if you point Tensorboard argument```--logdir``` to the experiment folder. -## Contribution guidelines -Please follow the steps below as you send a PR to 🐸. It helps us to keep things organized. - -1. Create a new branch. -2. Implement your changes. -3. (if applicable) Add [Google Style](https://google.github.io/styleguide/pyguide.html#381-docstrings) docstrings. -4. (if applicable) Implement a test case under ```tests``` folder. -5. (Optional but Prefered) Run tests. -```bash -./run_tests.sh -``` -6. Run the ```pylint``` linter. -```bash -pip install pylint cardboardlint -cardboardlinter --refspec master -``` -7. Send a PR to ```dev``` branch, explain what the change is about. -8. Let us discuss until we make it perfect :) 💪. -9. We merge it to the ```dev``` branch once things look good. - -Feel free to ping us at any step you need help using our communication channels. -[Here](https://github.com/firstcontributions/first-contributions) is a good resource for complete beginners. - +## [Contribution guidelines](https://github.com/coqui-ai/TTS/blob/main/CONTRIBUTING.md) ### Acknowledgement - https://github.com/keithito/tacotron (Dataset pre-processing) - https://github.com/r9y9/tacotron_pytorch (Initial Tacotron architecture) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index 16011dda..0a4337da 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -16,7 +16,7 @@ from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config if __name__ == "__main__": - # pylint: disable=bad-continuation + # pylint: disable=bad-option-value parser = argparse.ArgumentParser( description="""Extract attention masks from trained Tacotron/Tacotron2 models. These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n""" diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index d2436c6d..7891d65a 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -7,7 +7,7 @@ from TTS.tts.datasets.preprocess import get_preprocessor_by_name def main(): - # pylint: disable=bad-continuation + # pylint: disable=bad-option-value parser = argparse.ArgumentParser( description="""Find all the unique characters or phonemes in a dataset.\n\n""" """Target dataset must be defined in TTS.tts.datasets.preprocess\n\n""" diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 356196b5..aca245bb 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -23,7 +23,7 @@ def str2bool(v): def main(): - # pylint: disable=bad-continuation + # pylint: disable=bad-option-value parser = argparse.ArgumentParser( description="""Synthesize speech on command line.\n\n""" """You can either use your trained model or choose a model from the provided list.\n\n""" diff --git a/TTS/bin/train_vocoder_wavernn.py b/TTS/bin/train_vocoder_wavernn.py index 3f6f5836..7da56b51 100644 --- a/TTS/bin/train_vocoder_wavernn.py +++ b/TTS/bin/train_vocoder_wavernn.py @@ -24,7 +24,10 @@ from TTS.vocoder.utils.io import save_best_model, save_checkpoint # from torch.utils.data.distributed import DistributedSampler +<<<<<<< HEAD +======= +>>>>>>> reformat use_cuda, num_gpus = setup_torch_training_env(True, True) diff --git a/TTS/speaker_encoder/losses.py b/TTS/speaker_encoder/losses.py index d683df01..69264ab4 100644 --- a/TTS/speaker_encoder/losses.py +++ b/TTS/speaker_encoder/losses.py @@ -16,7 +16,7 @@ class GE2ELoss(nn.Module): - init_w (float): defines the initial value of w in Equation (5) of [1] - init_b (float): definies the initial value of b in Equation (5) of [1] """ - super(GE2ELoss, self).__init__() + super().__init__() # pylint: disable=E1102 self.w = nn.Parameter(torch.tensor(init_w)) # pylint: disable=E1102 @@ -129,7 +129,7 @@ class AngleProtoLoss(nn.Module): """ def __init__(self, init_w=10.0, init_b=-5.0): - super(AngleProtoLoss, self).__init__() + super().__init__() # pylint: disable=E1102 self.w = nn.Parameter(torch.tensor(init_w)) # pylint: disable=E1102 diff --git a/TTS/tts/datasets/TTSDataset.py b/TTS/tts/datasets/TTSDataset.py index 6d055bf7..b613e37c 100644 --- a/TTS/tts/datasets/TTSDataset.py +++ b/TTS/tts/datasets/TTSDataset.py @@ -54,7 +54,7 @@ class MyDataset(Dataset): use_noise_augment (bool): enable adding random noise to wav for augmentation. verbose (bool): print diagnostic information. """ - super(MyDataset, self).__init__() + super().__init__() self.batch_group_size = batch_group_size self.items = meta_data self.outputs_per_step = outputs_per_step diff --git a/TTS/tts/layers/feed_forward/encoder.py b/TTS/tts/layers/feed_forward/encoder.py index 81ffdeef..caf939ff 100644 --- a/TTS/tts/layers/feed_forward/encoder.py +++ b/TTS/tts/layers/feed_forward/encoder.py @@ -136,18 +136,18 @@ class Encoder(nn.Module): # init encoder if encoder_type.lower() == "relative_position_transformer": # text encoder + # pylint: disable=unexpected-keyword-arg self.encoder = RelativePositionTransformerEncoder( in_hidden_channels, out_channels, in_hidden_channels, encoder_params - ) # pylint: disable=unexpected-keyword-arg + ) elif encoder_type.lower() == "residual_conv_bn": self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, out_channels, in_hidden_channels, encoder_params) elif encoder_type.lower() == "fftransformer": assert ( in_hidden_channels == out_channels ), "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'" - self.encoder = FFTransformerBlock( - in_hidden_channels, **encoder_params - ) # pylint: disable=unexpected-keyword-arg + # pylint: disable=unexpected-keyword-arg + self.encoder = FFTransformerBlock(in_hidden_channels, **encoder_params) else: raise NotImplementedError(" [!] unknown encoder type.") diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index 51772bab..729a21af 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -52,7 +52,7 @@ class L1LossMasked(nn.Module): class MSELossMasked(nn.Module): def __init__(self, seq_len_norm): - super(MSELossMasked, self).__init__() + super().__init__() self.seq_len_norm = seq_len_norm def forward(self, x, target, length): @@ -132,7 +132,7 @@ class AttentionEntropyLoss(nn.Module): class BCELossMasked(nn.Module): def __init__(self, pos_weight): - super(BCELossMasked, self).__init__() + super().__init__() self.pos_weight = pos_weight def forward(self, x, target, length): @@ -193,7 +193,7 @@ class DifferentailSpectralLoss(nn.Module): class GuidedAttentionLoss(torch.nn.Module): def __init__(self, sigma=0.4): - super(GuidedAttentionLoss, self).__init__() + super().__init__() self.sigma = sigma def _make_ga_masks(self, ilens, olens): @@ -247,7 +247,7 @@ class TacotronLoss(torch.nn.Module): """Collection of Tacotron set-up based on provided config.""" def __init__(self, c, stopnet_pos_weight=10, ga_sigma=0.4): - super(TacotronLoss, self).__init__() + super().__init__() self.stopnet_pos_weight = stopnet_pos_weight self.ga_alpha = c.ga_alpha self.decoder_diff_spec_alpha = c.decoder_diff_spec_alpha diff --git a/TTS/tts/layers/tacotron/attentions.py b/TTS/tts/layers/tacotron/attentions.py index cbb643b8..a01ccc49 100644 --- a/TTS/tts/layers/tacotron/attentions.py +++ b/TTS/tts/layers/tacotron/attentions.py @@ -16,7 +16,7 @@ class LocationLayer(nn.Module): """ def __init__(self, attention_dim, attention_n_filters=32, attention_kernel_size=31): - super(LocationLayer, self).__init__() + super().__init__() self.location_conv1d = nn.Conv1d( in_channels=2, out_channels=attention_n_filters, @@ -51,7 +51,7 @@ class GravesAttention(nn.Module): def __init__(self, query_dim, K): - super(GravesAttention, self).__init__() + super().__init__() self._mask_value = 1e-8 self.K = K # self.attention_alignment = 0.05 @@ -178,7 +178,7 @@ class OriginalAttention(nn.Module): trans_agent, forward_attn_mask, ): - super(OriginalAttention, self).__init__() + super().__init__() self.query_layer = Linear(query_dim, attention_dim, bias=False, init_gain="tanh") self.inputs_layer = Linear(embedding_dim, attention_dim, bias=False, init_gain="tanh") self.v = Linear(attention_dim, 1, bias=True) diff --git a/TTS/tts/layers/tacotron/common_layers.py b/TTS/tts/layers/tacotron/common_layers.py index e2660cda..d3a9b80d 100644 --- a/TTS/tts/layers/tacotron/common_layers.py +++ b/TTS/tts/layers/tacotron/common_layers.py @@ -14,7 +14,7 @@ class Linear(nn.Module): """ def __init__(self, in_features, out_features, bias=True, init_gain="linear"): - super(Linear, self).__init__() + super().__init__() self.linear_layer = torch.nn.Linear(in_features, out_features, bias=bias) self._init_w(init_gain) @@ -38,7 +38,7 @@ class LinearBN(nn.Module): """ def __init__(self, in_features, out_features, bias=True, init_gain="linear"): - super(LinearBN, self).__init__() + super().__init__() self.linear_layer = torch.nn.Linear(in_features, out_features, bias=bias) self.batch_normalization = nn.BatchNorm1d(out_features, momentum=0.1, eps=1e-5) self._init_w(init_gain) @@ -87,7 +87,7 @@ class Prenet(nn.Module): # pylint: disable=dangerous-default-value def __init__(self, in_features, prenet_type="original", prenet_dropout=True, out_features=[256, 256], bias=True): - super(Prenet, self).__init__() + super().__init__() self.prenet_type = prenet_type self.prenet_dropout = prenet_dropout in_features = [in_features] + out_features[:-1] diff --git a/TTS/tts/layers/tacotron/tacotron.py b/TTS/tts/layers/tacotron/tacotron.py index 95930a05..153af5b7 100644 --- a/TTS/tts/layers/tacotron/tacotron.py +++ b/TTS/tts/layers/tacotron/tacotron.py @@ -26,7 +26,7 @@ class BatchNormConv1d(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride, padding, activation=None): - super(BatchNormConv1d, self).__init__() + super().__init__() self.padding = padding self.padder = nn.ConstantPad1d(padding, 0) self.conv1d = nn.Conv1d( @@ -71,7 +71,7 @@ class Highway(nn.Module): # TODO: Try GLU layer def __init__(self, in_features, out_feature): - super(Highway, self).__init__() + super().__init__() self.H = nn.Linear(in_features, out_feature) self.H.bias.data.zero_() self.T = nn.Linear(in_features, out_feature) @@ -118,7 +118,7 @@ class CBHG(nn.Module): gru_features=128, num_highways=4, ): - super(CBHG, self).__init__() + super().__init__() self.in_features = in_features self.conv_bank_features = conv_bank_features self.highway_features = highway_features @@ -191,7 +191,7 @@ class EncoderCBHG(nn.Module): r"""CBHG module with Encoder specific arguments""" def __init__(self): - super(EncoderCBHG, self).__init__() + super().__init__() self.cbhg = CBHG( 128, K=16, @@ -217,7 +217,7 @@ class Encoder(nn.Module): """ def __init__(self, in_features): - super(Encoder, self).__init__() + super().__init__() self.prenet = Prenet(in_features, out_features=[256, 128]) self.cbhg = EncoderCBHG() @@ -230,7 +230,7 @@ class Encoder(nn.Module): class PostCBHG(nn.Module): def __init__(self, mel_dim): - super(PostCBHG, self).__init__() + super().__init__() self.cbhg = CBHG( mel_dim, K=8, @@ -290,7 +290,7 @@ class Decoder(nn.Module): attn_K, separate_stopnet, ): - super(Decoder, self).__init__() + super().__init__() self.r_init = r self.r = r self.in_channels = in_channels @@ -491,7 +491,7 @@ class StopNet(nn.Module): """ def __init__(self, in_features): - super(StopNet, self).__init__() + super().__init__() self.dropout = nn.Dropout(0.1) self.linear = nn.Linear(in_features, 1) torch.nn.init.xavier_uniform_(self.linear.weight, gain=torch.nn.init.calculate_gain("linear")) diff --git a/TTS/tts/layers/tacotron/tacotron2.py b/TTS/tts/layers/tacotron/tacotron2.py index 7893cf4a..df14aead 100644 --- a/TTS/tts/layers/tacotron/tacotron2.py +++ b/TTS/tts/layers/tacotron/tacotron2.py @@ -24,7 +24,7 @@ class ConvBNBlock(nn.Module): """ def __init__(self, in_channels, out_channels, kernel_size, activation=None): - super(ConvBNBlock, self).__init__() + super().__init__() assert (kernel_size - 1) % 2 == 0 padding = (kernel_size - 1) // 2 self.convolution1d = nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding) @@ -57,7 +57,7 @@ class Postnet(nn.Module): """ def __init__(self, in_out_channels, num_convs=5): - super(Postnet, self).__init__() + super().__init__() self.convolutions = nn.ModuleList() self.convolutions.append(ConvBNBlock(in_out_channels, 512, kernel_size=5, activation="tanh")) for _ in range(1, num_convs - 1): @@ -83,7 +83,7 @@ class Encoder(nn.Module): """ def __init__(self, in_out_channels=512): - super(Encoder, self).__init__() + super().__init__() self.convolutions = nn.ModuleList() for _ in range(3): self.convolutions.append(ConvBNBlock(in_out_channels, in_out_channels, 5, "relu")) @@ -156,7 +156,7 @@ class Decoder(nn.Module): attn_K, separate_stopnet, ): - super(Decoder, self).__init__() + super().__init__() self.frame_channels = frame_channels self.r_init = r self.r = r diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py index 1fb01110..0254149d 100644 --- a/TTS/tts/models/tacotron.py +++ b/TTS/tts/models/tacotron.py @@ -79,7 +79,7 @@ class Tacotron(TacotronAbstract): memory_size=5, gst_use_speaker_embedding=False, ): - super(Tacotron, self).__init__( + super().__init__( num_chars, num_speakers, r, diff --git a/TTS/tts/tf/layers/tacotron/common_layers.py b/TTS/tts/tf/layers/tacotron/common_layers.py index b208d7fe..886f0e61 100644 --- a/TTS/tts/tf/layers/tacotron/common_layers.py +++ b/TTS/tts/tf/layers/tacotron/common_layers.py @@ -11,7 +11,7 @@ from tensorflow.python.ops import math_ops class Linear(keras.layers.Layer): def __init__(self, units, use_bias, **kwargs): - super(Linear, self).__init__(**kwargs) + super().__init__(**kwargs) self.linear_layer = keras.layers.Dense(units, use_bias=use_bias, name="linear_layer") self.activation = keras.layers.ReLU() @@ -25,7 +25,7 @@ class Linear(keras.layers.Layer): class LinearBN(keras.layers.Layer): def __init__(self, units, use_bias, **kwargs): - super(LinearBN, self).__init__(**kwargs) + super().__init__(**kwargs) self.linear_layer = keras.layers.Dense(units, use_bias=use_bias, name="linear_layer") self.batch_normalization = keras.layers.BatchNormalization( axis=-1, momentum=0.90, epsilon=1e-5, name="batch_normalization" @@ -44,7 +44,7 @@ class LinearBN(keras.layers.Layer): class Prenet(keras.layers.Layer): def __init__(self, prenet_type, prenet_dropout, units, bias, **kwargs): - super(Prenet, self).__init__(**kwargs) + super().__init__(**kwargs) self.prenet_type = prenet_type self.prenet_dropout = prenet_dropout self.linear_layers = [] @@ -98,7 +98,7 @@ class Attention(keras.layers.Layer): use_forward_attn_mask, **kwargs, ): - super(Attention, self).__init__(**kwargs) + super().__init__(**kwargs) self.use_loc_attn = use_loc_attn self.loc_attn_n_filters = loc_attn_n_filters self.loc_attn_kernel_size = loc_attn_kernel_size @@ -244,8 +244,7 @@ class Attention(keras.layers.Layer): # location_attention_filters=32, # location_attention_kernel_size=31): -# super(LocationSensitiveAttention, -# self).__init__(units=units, +# super( self).__init__(units=units, # memory=memory, # memory_sequence_length=memory_sequence_length, # normalize=normalize, diff --git a/TTS/tts/tf/layers/tacotron/tacotron2.py b/TTS/tts/tf/layers/tacotron/tacotron2.py index 0e3b5756..1fe679d2 100644 --- a/TTS/tts/tf/layers/tacotron/tacotron2.py +++ b/TTS/tts/tf/layers/tacotron/tacotron2.py @@ -10,7 +10,7 @@ from TTS.tts.tf.utils.tf_utils import shape_list # pylint: disable=unexpected-keyword-arg class ConvBNBlock(keras.layers.Layer): def __init__(self, filters, kernel_size, activation, **kwargs): - super(ConvBNBlock, self).__init__(**kwargs) + super().__init__(**kwargs) self.convolution1d = keras.layers.Conv1D(filters, kernel_size, padding="same", name="convolution1d") self.batch_normalization = keras.layers.BatchNormalization( axis=2, momentum=0.90, epsilon=1e-5, name="batch_normalization" @@ -28,7 +28,7 @@ class ConvBNBlock(keras.layers.Layer): class Postnet(keras.layers.Layer): def __init__(self, output_filters, num_convs, **kwargs): - super(Postnet, self).__init__(**kwargs) + super().__init__(**kwargs) self.convolutions = [] self.convolutions.append(ConvBNBlock(512, 5, "tanh", name="convolutions_0")) for idx in range(1, num_convs - 1): @@ -44,7 +44,7 @@ class Postnet(keras.layers.Layer): class Encoder(keras.layers.Layer): def __init__(self, output_input_dim, **kwargs): - super(Encoder, self).__init__(**kwargs) + super().__init__(**kwargs) self.convolutions = [] for idx in range(3): self.convolutions.append(ConvBNBlock(output_input_dim, 5, "relu", name=f"convolutions_{idx}")) @@ -81,7 +81,7 @@ class Decoder(keras.layers.Layer): enable_tflite, **kwargs, ): - super(Decoder, self).__init__(**kwargs) + super().__init__(**kwargs) self.frame_dim = frame_dim self.r_init = tf.constant(r, dtype=tf.int32) self.r = tf.constant(r, dtype=tf.int32) diff --git a/TTS/tts/tf/models/tacotron2.py b/TTS/tts/tf/models/tacotron2.py index 2c0e5d6f..e9a73914 100644 --- a/TTS/tts/tf/models/tacotron2.py +++ b/TTS/tts/tf/models/tacotron2.py @@ -28,7 +28,11 @@ class Tacotron2(keras.models.Model): bidirectional_decoder=False, enable_tflite=False, ): +<<<<<<< HEAD super(Tacotron2, self).__init__() +======= + super().__init__() +>>>>>>> reformat self.r = r self.decoder_output_dim = decoder_output_dim self.postnet_output_dim = postnet_output_dim diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py index 25786d70..cb2827fd 100644 --- a/TTS/tts/utils/speakers.py +++ b/TTS/tts/utils/speakers.py @@ -57,7 +57,7 @@ def parse_speakers(c, args, meta_data_train, OUT_PATH): prev_out_path = os.path.dirname(args.restore_path) speaker_mapping = load_speaker_mapping(prev_out_path) speaker_embedding_dim = None - assert all([speaker in speaker_mapping for speaker in speakers]), ( + assert all(speaker in speaker_mapping for speaker in speakers), ( "As of now you, you cannot " "introduce new speakers to " "a previously trained model." ) elif ( diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py index 30e5feab..8dd33dc7 100644 --- a/TTS/tts/utils/synthesis.py +++ b/TTS/tts/utils/synthesis.py @@ -1,17 +1,17 @@ import os -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +import numpy as np import pkg_resources +import torch + +from .text import phoneme_to_sequence, text_to_sequence + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" installed = {pkg.key for pkg in pkg_resources.working_set} # pylint: disable=not-an-iterable if "tensorflow" in installed or "tensorflow-gpu" in installed: import tensorflow as tf -import numpy as np -import torch - -from .text import phoneme_to_sequence, text_to_sequence - def text_to_seqvec(text, CONFIG): text_cleaner = [CONFIG.text_cleaner] diff --git a/TTS/tts/utils/visual.py b/TTS/tts/utils/visual.py index b27dca2c..44732322 100644 --- a/TTS/tts/utils/visual.py +++ b/TTS/tts/utils/visual.py @@ -1,13 +1,13 @@ import librosa import matplotlib +import matplotlib.pyplot as plt import numpy as np import torch -matplotlib.use("Agg") -import matplotlib.pyplot as plt - from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme +matplotlib.use("Agg") + def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False): if isinstance(alignment, torch.Tensor): diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index a656efc0..034397a6 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -179,8 +179,8 @@ class AudioProcessor(object): S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm if self.clip_norm: S_norm = np.clip( - S_norm, -self.max_norm, self.max_norm - ) # pylint: disable=invalid-unary-operand-type + S_norm, -self.max_norm, self.max_norm # pylint: disable=invalid-unary-operand-type + ) return S_norm else: S_norm = self.max_norm * S_norm @@ -206,8 +206,8 @@ class AudioProcessor(object): if self.symmetric_norm: if self.clip_norm: S_denorm = np.clip( - S_denorm, -self.max_norm, self.max_norm - ) # pylint: disable=invalid-unary-operand-type + S_denorm, -self.max_norm, self.max_norm # pylint: disable=invalid-unary-operand-type + ) S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db return S_denorm + self.ref_level_db else: @@ -326,13 +326,7 @@ class AudioProcessor(object): ) def _istft(self, y): - return librosa.istft( - y, - hop_length=self.hop_length, - win_length=self.win_length, - window="hann", - center=True, - ) + return librosa.istft(y, hop_length=self.hop_length, win_length=self.win_length) def _griffin_lim(self, S): angles = np.exp(2j * np.pi * np.random.rand(*S.shape)) diff --git a/TTS/utils/distribute.py b/TTS/utils/distribute.py index 7703ab4e..7a1078e8 100644 --- a/TTS/utils/distribute.py +++ b/TTS/utils/distribute.py @@ -14,7 +14,7 @@ class DistributedSampler(Sampler): """ def __init__(self, dataset, num_replicas=None, rank=None): - super(DistributedSampler, self).__init__(dataset) + super().__init__(dataset) if num_replicas is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 93f8b749..84493e07 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -19,7 +19,7 @@ class AttrDict(dict): to class attributes""" def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.__dict__ = self diff --git a/TTS/utils/radam.py b/TTS/utils/radam.py index 40d8cec9..b6c86fed 100644 --- a/TTS/utils/radam.py +++ b/TTS/utils/radam.py @@ -25,10 +25,10 @@ class RAdam(Optimizer): defaults = dict( lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[[None, None, None] for _ in range(10)] ) - super(RAdam, self).__init__(params, defaults) + super().__init__(params, defaults) def __setstate__(self, state): # pylint: disable=useless-super-delegation - super(RAdam, self).__setstate__(state) + super().__setstate__(state) def step(self, closure=None): diff --git a/TTS/utils/training.py b/TTS/utils/training.py index a1581b32..37b32637 100644 --- a/TTS/utils/training.py +++ b/TTS/utils/training.py @@ -74,7 +74,7 @@ def set_weight_decay(model, weight_decay, skip_list={"decoder.attention.v", "rnn if not param.requires_grad: continue - if len(param.shape) == 1 or any([skip_name in name for skip_name in skip_list]): + if len(param.shape) == 1 or any((skip_name in name for skip_name in skip_list)): no_decay.append(param) else: decay.append(param) @@ -85,7 +85,7 @@ def set_weight_decay(model, weight_decay, skip_list={"decoder.attention.v", "rnn class NoamLR(torch.optim.lr_scheduler._LRScheduler): def __init__(self, optimizer, warmup_steps=0.1, last_epoch=-1): self.warmup_steps = float(warmup_steps) - super(NoamLR, self).__init__(optimizer, last_epoch) + super().__init__(optimizer, last_epoch) def get_lr(self): step = max(self.last_epoch, 1) diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py index 85d27e8b..05d7d0d7 100644 --- a/TTS/vocoder/datasets/gan_dataset.py +++ b/TTS/vocoder/datasets/gan_dataset.py @@ -30,7 +30,7 @@ class GANDataset(Dataset): use_cache=False, verbose=False, ): - super(GANDataset, self).__init__() + super().__init__() self.ap = ap self.item_list = items self.compute_feat = not isinstance(items[0], (tuple, list)) diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py index 4ab8a174..1596ea8f 100644 --- a/TTS/vocoder/datasets/wavernn_dataset.py +++ b/TTS/vocoder/datasets/wavernn_dataset.py @@ -22,7 +22,7 @@ class WaveRNNDataset(Dataset): verbose=False, ): - super(WaveRNNDataset, self).__init__() + super().__init__() self.ap = ap self.compute_feat = not isinstance(items[0], (tuple, list)) self.item_list = items diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index a6e438f9..e291a112 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -20,8 +20,7 @@ class TorchSTFT(nn.Module): # pylint: disable=abstract-method n_mels=80, use_mel=False, ): - """ Torch based STFT operation """ - super(TorchSTFT, self).__init__() + super().__init__() self.n_fft = n_fft self.hop_length = hop_length self.win_length = win_length @@ -91,7 +90,7 @@ class STFTLoss(nn.Module): It is from ParallelWaveGAN paper https://arxiv.org/pdf/1910.11480.pdf""" def __init__(self, n_fft, hop_length, win_length): - super(STFTLoss, self).__init__() + super().__init__() self.n_fft = n_fft self.hop_length = hop_length self.win_length = win_length @@ -113,7 +112,7 @@ class MultiScaleSTFTLoss(torch.nn.Module): It is from ParallelWaveGAN paper https://arxiv.org/pdf/1910.11480.pdf""" def __init__(self, n_ffts=(1024, 2048, 512), hop_lengths=(120, 240, 50), win_lengths=(600, 1200, 240)): - super(MultiScaleSTFTLoss, self).__init__() + super().__init__() self.loss_funcs = torch.nn.ModuleList() for n_fft, hop_length, win_length in zip(n_ffts, hop_lengths, win_lengths): self.loss_funcs.append(STFTLoss(n_fft, hop_length, win_length)) @@ -199,7 +198,7 @@ class MSEDLoss(nn.Module): def __init__( self, ): - super(MSEDLoss, self).__init__() + super().__init__() self.loss_func = nn.MSELoss() # pylint: disable=no-self-use @@ -225,7 +224,7 @@ class MelganFeatureLoss(nn.Module): def __init__( self, ): - super(MelganFeatureLoss, self).__init__() + super().__init__() self.loss_func = nn.L1Loss() # pylint: disable=no-self-use diff --git a/TTS/vocoder/layers/melgan.py b/TTS/vocoder/layers/melgan.py index 67f98c13..7fd999d9 100644 --- a/TTS/vocoder/layers/melgan.py +++ b/TTS/vocoder/layers/melgan.py @@ -4,7 +4,7 @@ from torch.nn.utils import weight_norm class ResidualStack(nn.Module): def __init__(self, channels, num_res_blocks, kernel_size): - super(ResidualStack, self).__init__() + super().__init__() assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." base_padding = (kernel_size - 1) // 2 diff --git a/TTS/vocoder/layers/parallel_wavegan.py b/TTS/vocoder/layers/parallel_wavegan.py index 427a2f3d..889e8aa6 100644 --- a/TTS/vocoder/layers/parallel_wavegan.py +++ b/TTS/vocoder/layers/parallel_wavegan.py @@ -17,7 +17,7 @@ class ResidualBlock(torch.nn.Module): bias=True, use_causal_conv=False, ): - super(ResidualBlock, self).__init__() + super().__init__() self.dropout = dropout # no future time stamps available if use_causal_conv: diff --git a/TTS/vocoder/layers/pqmf.py b/TTS/vocoder/layers/pqmf.py index 38d28fc1..6253efbb 100644 --- a/TTS/vocoder/layers/pqmf.py +++ b/TTS/vocoder/layers/pqmf.py @@ -8,7 +8,7 @@ from scipy import signal as sig # https://github.com/kan-bayashi/ParallelWaveGAN/tree/master/parallel_wavegan class PQMF(torch.nn.Module): def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0): - super(PQMF, self).__init__() + super().__init__() self.N = N self.taps = taps diff --git a/TTS/vocoder/layers/upsample.py b/TTS/vocoder/layers/upsample.py index 9bf2a6b4..e169db00 100644 --- a/TTS/vocoder/layers/upsample.py +++ b/TTS/vocoder/layers/upsample.py @@ -4,7 +4,7 @@ from torch.nn import functional as F class Stretch2d(torch.nn.Module): def __init__(self, x_scale, y_scale, mode="nearest"): - super(Stretch2d, self).__init__() + super().__init__() self.x_scale = x_scale self.y_scale = y_scale self.mode = mode @@ -28,7 +28,7 @@ class UpsampleNetwork(torch.nn.Module): freq_axis_kernel_size=1, use_causal_conv=False, ): - super(UpsampleNetwork, self).__init__() + super().__init__() self.use_causal_conv = use_causal_conv self.up_layers = torch.nn.ModuleList() for scale in upsample_factors: @@ -76,7 +76,7 @@ class ConvUpsample(torch.nn.Module): aux_context_window=0, use_causal_conv=False, ): - super(ConvUpsample, self).__init__() + super().__init__() self.aux_context_window = aux_context_window self.use_causal_conv = use_causal_conv and aux_context_window > 0 # To capture wide-context information in conditional features diff --git a/TTS/vocoder/models/melgan_discriminator.py b/TTS/vocoder/models/melgan_discriminator.py index 48499389..14f00c59 100644 --- a/TTS/vocoder/models/melgan_discriminator.py +++ b/TTS/vocoder/models/melgan_discriminator.py @@ -14,7 +14,7 @@ class MelganDiscriminator(nn.Module): downsample_factors=(4, 4, 4, 4), groups_denominator=4, ): - super(MelganDiscriminator, self).__init__() + super().__init__() self.layers = nn.ModuleList() layer_kernel_size = np.prod(kernel_sizes) diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py index ea7a90b1..dabb4baa 100644 --- a/TTS/vocoder/models/melgan_generator.py +++ b/TTS/vocoder/models/melgan_generator.py @@ -16,7 +16,7 @@ class MelganGenerator(nn.Module): res_kernel=3, num_res_blocks=3, ): - super(MelganGenerator, self).__init__() + super().__init__() # assert model parameters assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number." diff --git a/TTS/vocoder/models/melgan_multiscale_discriminator.py b/TTS/vocoder/models/melgan_multiscale_discriminator.py index dc907040..33e0a688 100644 --- a/TTS/vocoder/models/melgan_multiscale_discriminator.py +++ b/TTS/vocoder/models/melgan_multiscale_discriminator.py @@ -18,7 +18,7 @@ class MelganMultiscaleDiscriminator(nn.Module): pooling_padding=2, groups_denominator=4, ): - super(MelganMultiscaleDiscriminator, self).__init__() + super().__init__() self.discriminators = nn.ModuleList( [ diff --git a/TTS/vocoder/models/multiband_melgan_generator.py b/TTS/vocoder/models/multiband_melgan_generator.py index 8b61db18..25d65906 100644 --- a/TTS/vocoder/models/multiband_melgan_generator.py +++ b/TTS/vocoder/models/multiband_melgan_generator.py @@ -15,7 +15,7 @@ class MultibandMelganGenerator(MelganGenerator): res_kernel=3, num_res_blocks=3, ): - super(MultibandMelganGenerator, self).__init__( + super().__init__( in_channels=in_channels, out_channels=out_channels, proj_kernel=proj_kernel, diff --git a/TTS/vocoder/models/parallel_wavegan_discriminator.py b/TTS/vocoder/models/parallel_wavegan_discriminator.py index 6cd6a82e..9cc1061c 100644 --- a/TTS/vocoder/models/parallel_wavegan_discriminator.py +++ b/TTS/vocoder/models/parallel_wavegan_discriminator.py @@ -26,7 +26,7 @@ class ParallelWaveganDiscriminator(nn.Module): nonlinear_activation_params={"negative_slope": 0.2}, bias=True, ): - super(ParallelWaveganDiscriminator, self).__init__() + super().__init__() assert (kernel_size - 1) % 2 == 0, " [!] does not support even number kernel size." assert dilation_factor > 0, " [!] dilation factor must be > 0." self.conv_layers = nn.ModuleList() @@ -100,7 +100,7 @@ class ResidualParallelWaveganDiscriminator(nn.Module): nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.2}, ): - super(ResidualParallelWaveganDiscriminator, self).__init__() + super().__init__() assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." self.in_channels = in_channels diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index 08e52371..788856cc 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -33,7 +33,7 @@ class ParallelWaveganGenerator(torch.nn.Module): inference_padding=2, ): - super(ParallelWaveganGenerator, self).__init__() + super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.aux_channels = aux_channels diff --git a/TTS/vocoder/models/random_window_discriminator.py b/TTS/vocoder/models/random_window_discriminator.py index 0a2e2887..ea95668a 100644 --- a/TTS/vocoder/models/random_window_discriminator.py +++ b/TTS/vocoder/models/random_window_discriminator.py @@ -4,7 +4,7 @@ from torch import nn class GBlock(nn.Module): def __init__(self, in_channels, cond_channels, downsample_factor): - super(GBlock, self).__init__() + super().__init__() self.in_channels = in_channels self.cond_channels = cond_channels @@ -35,7 +35,7 @@ class GBlock(nn.Module): class DBlock(nn.Module): def __init__(self, in_channels, out_channels, downsample_factor): - super(DBlock, self).__init__() + super().__init__() self.in_channels = in_channels self.downsample_factor = downsample_factor @@ -62,7 +62,7 @@ class DBlock(nn.Module): class ConditionalDiscriminator(nn.Module): def __init__(self, in_channels, cond_channels, downsample_factors=(2, 2, 2), out_channels=(128, 256)): - super(ConditionalDiscriminator, self).__init__() + super().__init__() assert len(downsample_factors) == len(out_channels) + 1 @@ -106,7 +106,7 @@ class ConditionalDiscriminator(nn.Module): class UnconditionalDiscriminator(nn.Module): def __init__(self, in_channels, base_channels=64, downsample_factors=(8, 4), out_channels=(128, 256)): - super(UnconditionalDiscriminator, self).__init__() + super().__init__() self.downsample_factors = downsample_factors self.in_channels = in_channels @@ -148,7 +148,7 @@ class RandomWindowDiscriminator(nn.Module): window_sizes=(512, 1024, 2048, 4096, 8192), ): - super(RandomWindowDiscriminator, self).__init__() + super().__init__() self.cond_channels = cond_channels self.window_sizes = window_sizes self.hop_length = hop_length diff --git a/TTS/vocoder/tf/layers/melgan.py b/TTS/vocoder/tf/layers/melgan.py index 36d8724c..90bce6f1 100644 --- a/TTS/vocoder/tf/layers/melgan.py +++ b/TTS/vocoder/tf/layers/melgan.py @@ -3,7 +3,7 @@ import tensorflow as tf class ReflectionPad1d(tf.keras.layers.Layer): def __init__(self, padding): - super(ReflectionPad1d, self).__init__() + super().__init__() self.padding = padding def call(self, x): @@ -12,7 +12,7 @@ class ReflectionPad1d(tf.keras.layers.Layer): class ResidualStack(tf.keras.layers.Layer): def __init__(self, channels, num_res_blocks, kernel_size, name): - super(ResidualStack, self).__init__(name=name) + super().__init__(name=name) assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." base_padding = (kernel_size - 1) // 2 diff --git a/TTS/vocoder/tf/layers/pqmf.py b/TTS/vocoder/tf/layers/pqmf.py index 06800b69..81b666b9 100644 --- a/TTS/vocoder/tf/layers/pqmf.py +++ b/TTS/vocoder/tf/layers/pqmf.py @@ -5,7 +5,7 @@ from scipy import signal as sig class PQMF(tf.keras.layers.Layer): def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0): - super(PQMF, self).__init__() + super().__init__() # define filter coefficient self.N = N self.taps = taps diff --git a/TTS/vocoder/tf/models/melgan_generator.py b/TTS/vocoder/tf/models/melgan_generator.py index eb8b6eec..90e0fa0c 100644 --- a/TTS/vocoder/tf/models/melgan_generator.py +++ b/TTS/vocoder/tf/models/melgan_generator.py @@ -1,13 +1,14 @@ import logging import os -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL -logging.getLogger("tensorflow").setLevel(logging.FATAL) - import tensorflow as tf from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL +logging.getLogger("tensorflow").setLevel(logging.FATAL) + +from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack # pylint: disable=too-many-ancestors # pylint: disable=abstract-method @@ -25,7 +26,7 @@ class MelganGenerator(tf.keras.models.Model): res_kernel=3, num_res_blocks=3, ): - super(MelganGenerator, self).__init__() + super().__init__() self.in_channels = in_channels diff --git a/TTS/vocoder/tf/models/multiband_melgan_generator.py b/TTS/vocoder/tf/models/multiband_melgan_generator.py index 51d5cbc3..24d899b2 100644 --- a/TTS/vocoder/tf/models/multiband_melgan_generator.py +++ b/TTS/vocoder/tf/models/multiband_melgan_generator.py @@ -17,7 +17,7 @@ class MultibandMelganGenerator(MelganGenerator): res_kernel=3, num_res_blocks=3, ): - super(MultibandMelganGenerator, self).__init__( + super().__init__( in_channels=in_channels, out_channels=out_channels, proj_kernel=proj_kernel, diff --git a/hubconf.py b/hubconf.py index d8589c94..0dd25893 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,8 +1,8 @@ dependencies = ['torch', 'gdown', 'pysbd', 'phonemizer', 'unidecode', 'pypinyin'] # apt install espeak-ng import torch -from TTS.utils.synthesizer import Synthesizer from TTS.utils.manage import ModelManager +from TTS.utils.synthesizer import Synthesizer def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', vocoder_name=None, use_cuda=False): diff --git a/requirements.txt b/requirements.txt index 42544666..2924f3ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,12 +16,14 @@ tqdm inflect bokeh==1.4.0 pysbd -# pyworld soundfile -nose==1.3.7 -cardboardlint==1.3.0 -pylint==2.5.3 gdown umap-learn==0.4.6 cython -pyyaml \ No newline at end of file +pyyaml +# quality and style +nose +coverage +black +isort +pylint==2.7.4 \ No newline at end of file diff --git a/requirements_tests.txt b/requirements_tests.txt deleted file mode 100644 index 5b833858..00000000 --- a/requirements_tests.txt +++ /dev/null @@ -1,21 +0,0 @@ -torch>=1.5 -tensorflow==2.3.1 -numpy>=1.16.0 -scipy>=0.19.0 -numba==0.48 -librosa==0.7.2 -phonemizer>=2.2.0 -unidecode==0.4.20 -attrdict -tensorboardX -matplotlib -Pillow -flask -tqdm -inflect -pysbd -bokeh==1.4.0 -soundfile -nose==1.3.7 -cardboardlint==1.3.0 -cython \ No newline at end of file diff --git a/run_bash_tests.sh b/run_bash_tests.sh new file mode 100755 index 00000000..16381611 --- /dev/null +++ b/run_bash_tests.sh @@ -0,0 +1,14 @@ +set -e +TF_CPP_MIN_LOG_LEVEL=3 + +# runtime bash based tests +./tests/bash_tests/test_demo_server.sh && \ +./tests/bash_tests/test_resample.sh && \ +./tests/bash_tests/test_tacotron_train.sh && \ +./tests/bash_tests/test_glow-tts_train.sh && \ +./tests/bash_tests/test_vocoder_gan_train.sh && \ +./tests/bash_tests/test_vocoder_wavernn_train.sh && \ +./tests/bash_tests/test_vocoder_wavegrad_train.sh && \ +./tests/bash_tests/test_speedy_speech_train.sh && \ +./tests/bash_tests/test_aligntts_train.sh && \ +./tests/bash_tests/test_compute_statistics.sh diff --git a/setup.py b/setup.py index de277655..68337644 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,8 @@ from distutils.version import LooseVersion import numpy import setuptools.command.build_py import setuptools.command.develop -from setuptools import setup, Extension, find_packages from Cython.Build import cythonize - +from setuptools import Extension, find_packages, setup if LooseVersion(sys.version) < LooseVersion("3.6") or LooseVersion(sys.version) > LooseVersion("3.9"): raise RuntimeError( diff --git a/tests/test_aligntts_train.sh b/tests/bash_tests/test_aligntts_train.sh old mode 100644 new mode 100755 similarity index 61% rename from tests/test_aligntts_train.sh rename to tests/bash_tests/test_aligntts_train.sh index 22e6ff12..38d46520 --- a/tests/test_aligntts_train.sh +++ b/tests/bash_tests/test_aligntts_train.sh @@ -3,11 +3,11 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_align_tts.py --config_path $BASEDIR/inputs/test_align_tts.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_align_tts.py --config_path $BASEDIR/../inputs/test_align_tts.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_align_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_align_tts.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/ +rm -rf $BASEDIR/../train_outputs/ diff --git a/tests/test_compute_statistics.sh b/tests/bash_tests/test_compute_statistics.sh similarity index 60% rename from tests/test_compute_statistics.sh rename to tests/bash_tests/test_compute_statistics.sh index c2b32282..d7f0ab9d 100755 --- a/tests/test_compute_statistics.sh +++ b/tests/bash_tests/test_compute_statistics.sh @@ -3,5 +3,5 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/inputs/test_glow_tts.json --out_path $BASEDIR/outputs/scale_stats.npy +CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/../inputs/test_glow_tts.json --out_path $BASEDIR/../outputs/scale_stats.npy diff --git a/tests/test_demo_server.sh b/tests/bash_tests/test_demo_server.sh similarity index 100% rename from tests/test_demo_server.sh rename to tests/bash_tests/test_demo_server.sh diff --git a/tests/test_glow-tts_train.sh b/tests/bash_tests/test_glow-tts_train.sh similarity index 62% rename from tests/test_glow-tts_train.sh rename to tests/bash_tests/test_glow-tts_train.sh index add7292d..04aef2ad 100755 --- a/tests/test_glow-tts_train.sh +++ b/tests/bash_tests/test_glow-tts_train.sh @@ -3,11 +3,11 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_glow_tts.py --config_path $BASEDIR/inputs/test_glow_tts.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_glow_tts.py --config_path $BASEDIR/../inputs/test_glow_tts.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_glow_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_glow_tts.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/ +rm -rf $BASEDIR/../train_outputs/ diff --git a/tests/test_resample.sh b/tests/bash_tests/test_resample.sh similarity index 78% rename from tests/test_resample.sh rename to tests/bash_tests/test_resample.sh index ddae17ad..ba871272 100755 --- a/tests/test_resample.sh +++ b/tests/bash_tests/test_resample.sh @@ -4,7 +4,7 @@ BASEDIR=$(dirname "$0") TARGET_SR=16000 echo "$BASEDIR" #run the resample script -python TTS/bin/resample.py --input_dir $BASEDIR/data/ljspeech --output_dir $BASEDIR/outputs/resample_tests --output_sr $TARGET_SR +python TTS/bin/resample.py --input_dir $BASEDIR/../data/ljspeech --output_dir $BASEDIR/outputs/resample_tests --output_sr $TARGET_SR #check samplerate of output OUT_SR=$( (echo "import librosa" ; echo "y, sr = librosa.load('"$BASEDIR"/outputs/resample_tests/wavs/LJ001-0012.wav', sr=None)" ; echo "print(sr)") | python ) OUT_SR=$(($OUT_SR + 0)) diff --git a/tests/test_speedy_speech_train.sh b/tests/bash_tests/test_speedy_speech_train.sh similarity index 60% rename from tests/test_speedy_speech_train.sh rename to tests/bash_tests/test_speedy_speech_train.sh index e0c85000..2276034f 100755 --- a/tests/test_speedy_speech_train.sh +++ b/tests/bash_tests/test_speedy_speech_train.sh @@ -3,11 +3,11 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_speedy_speech.py --config_path $BASEDIR/inputs/test_speedy_speech.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_speedy_speech.py --config_path $BASEDIR/../inputs/test_speedy_speech.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_speedy_speech.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_speedy_speech.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/ +rm -rf $BASEDIR/../train_outputs/ diff --git a/tests/test_tacotron_train.sh b/tests/bash_tests/test_tacotron_train.sh similarity index 58% rename from tests/test_tacotron_train.sh rename to tests/bash_tests/test_tacotron_train.sh index e0a0253b..4aacf69c 100755 --- a/tests/test_tacotron_train.sh +++ b/tests/bash_tests/test_tacotron_train.sh @@ -4,33 +4,33 @@ BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/../inputs/test_tacotron_config.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/ +rm -rf $BASEDIR/../train_outputs/ # run Tacotron bi-directional decoder -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron_bd_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/../inputs/test_tacotron_bd_config.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/ +rm -rf $BASEDIR/../train_outputs/ # Tacotron2 # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron2_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/../inputs/test_tacotron2_config.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/ +rm -rf $BASEDIR/../train_outputs/ diff --git a/tests/test_vocoder_gan_train.sh b/tests/bash_tests/test_vocoder_gan_train.sh similarity index 55% rename from tests/test_vocoder_gan_train.sh rename to tests/bash_tests/test_vocoder_gan_train.sh index 0ed2b599..b2f43721 100755 --- a/tests/test_vocoder_gan_train.sh +++ b/tests/bash_tests/test_vocoder_gan_train.sh @@ -3,13 +3,13 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # create run dir -mkdir $BASEDIR/train_outputs +mkdir $BASEDIR/../train_outputs # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/../inputs/test_vocoder_multiband_melgan_config.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER +rm -rf $BASEDIR/../train_outputs/$LATEST_FOLDER diff --git a/tests/test_vocoder_wavegrad_train.sh b/tests/bash_tests/test_vocoder_wavegrad_train.sh similarity index 55% rename from tests/test_vocoder_wavegrad_train.sh rename to tests/bash_tests/test_vocoder_wavegrad_train.sh index 33ffe865..9626187f 100755 --- a/tests/test_vocoder_wavegrad_train.sh +++ b/tests/bash_tests/test_vocoder_wavegrad_train.sh @@ -3,13 +3,13 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # create run dir -mkdir -p $BASEDIR/train_outputs +mkdir -p $BASEDIR/../train_outputs # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --config_path $BASEDIR/inputs/test_vocoder_wavegrad.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --config_path $BASEDIR/../inputs/test_vocoder_wavegrad.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER \ No newline at end of file +rm -rf $BASEDIR/../train_outputs/$LATEST_FOLDER \ No newline at end of file diff --git a/tests/test_vocoder_wavernn_train.sh b/tests/bash_tests/test_vocoder_wavernn_train.sh similarity index 55% rename from tests/test_vocoder_wavernn_train.sh rename to tests/bash_tests/test_vocoder_wavernn_train.sh index 40e86012..7b554fc9 100755 --- a/tests/test_vocoder_wavernn_train.sh +++ b/tests/bash_tests/test_vocoder_wavernn_train.sh @@ -3,13 +3,13 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # create run dir -mkdir -p $BASEDIR/train_outputs +mkdir -p $BASEDIR/../train_outputs # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/../inputs/test_vocoder_wavernn_config.json # find the training folder -LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +LATEST_FOLDER=$(ls $BASEDIR/../train_outputs/| sort | tail -1) echo $LATEST_FOLDER # continue the previous training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/../train_outputs/$LATEST_FOLDER # remove all the outputs -rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER \ No newline at end of file +rm -rf $BASEDIR/../train_outputs/$LATEST_FOLDER \ No newline at end of file diff --git a/tests/inputs/test_align_tts.json b/tests/inputs/test_align_tts.json index 8815906b..964cc66d 100644 --- a/tests/inputs/test_align_tts.json +++ b/tests/inputs/test_align_tts.json @@ -65,7 +65,7 @@ // MODEL PARAMETERS "positional_encoding": true, "hidden_channels": 256, - "hidden_channels_dp": 128, + "hidden_channels_dp": 256, "encoder_type": "fftransformer", "encoder_params":{ "hidden_channels_ffn": 1024 , @@ -87,7 +87,7 @@ "eval_batch_size":1, "r": 1, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. "loss_masking": true, // enable / disable loss masking against the sequence padding. - "phase_start_steps": [0, 40000, 80000, 160000, 170000], + "phase_start_steps": null, // LOSS PARAMETERS diff --git a/tests/test_audio.py b/tests/test_audio.py index 5eb08262..8065383e 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -16,7 +16,7 @@ conf = load_config(os.path.join(get_tests_input_path(), "test_config.json")) # pylint: disable=protected-access class TestAudio(unittest.TestCase): def __init__(self, *args, **kwargs): - super(TestAudio, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.ap = AudioProcessor(**conf.audio) def test_audio_synthesis(self): diff --git a/tests/test_loader.py b/tests/test_loader.py index b7cf7302..6174865b 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -28,7 +28,7 @@ print(" > Dynamic data loader test: {}".format(DATA_EXIST)) class TestTTSDataset(unittest.TestCase): def __init__(self, *args, **kwargs): - super(TestTTSDataset, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.max_loader_iter = 4 self.ap = AudioProcessor(**c.audio)