diff --git a/TTS/bin/distribute.py b/TTS/bin/distribute.py
index 97e2f0e3..b5552e32 100644
--- a/TTS/bin/distribute.py
+++ b/TTS/bin/distribute.py
@@ -35,7 +35,7 @@ def main():
     command += unargs
     command.append("")
 
-    # run processes
+    # run a processes per GPU
     processes = []
     for i in range(num_gpus):
         my_env = os.environ.copy()
diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py
index a03bfd82..de9e5865 100644
--- a/TTS/bin/eval_encoder.py
+++ b/TTS/bin/eval_encoder.py
@@ -1,17 +1,18 @@
 import argparse
-import torch
 from argparse import RawTextHelpFormatter
 
+import torch
 from tqdm import tqdm
 
 from TTS.config import load_config
 from TTS.tts.datasets import load_tts_samples
 from TTS.tts.utils.speakers import SpeakerManager
 
+
 def compute_encoder_accuracy(dataset_items, encoder_manager):
 
     class_name_key = encoder_manager.speaker_encoder_config.class_name_key
-    map_classid_to_classname = getattr(encoder_manager.speaker_encoder_config, 'map_classid_to_classname', None)
+    map_classid_to_classname = getattr(encoder_manager.speaker_encoder_config, "map_classid_to_classname", None)
 
     class_acc_dict = {}
 
@@ -43,11 +44,11 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):
 
     acc_avg = 0
     for key, values in class_acc_dict.items():
-        acc = sum(values)/len(values)
+        acc = sum(values) / len(values)
         print("Class", key, "Accuracy:", acc)
         acc_avg += acc
 
-    print("Average Accuracy:", acc_avg/len(class_acc_dict))
+    print("Average Accuracy:", acc_avg / len(class_acc_dict))
 
 
 if __name__ == "__main__":
diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py
index fe31c510..8b3f53db 100755
--- a/TTS/bin/synthesize.py
+++ b/TTS/bin/synthesize.py
@@ -210,7 +210,13 @@ If you don't specify any models, then it uses LJSpeech based English model.
     args = parser.parse_args()
 
     # print the description if either text or list_models is not set
-    if not args.text and not args.list_models and not args.list_speaker_idxs and not args.list_language_idxs and not args.reference_wav:
+    if (
+        not args.text
+        and not args.list_models
+        and not args.list_speaker_idxs
+        and not args.list_language_idxs
+        and not args.reference_wav
+    ):
         parser.parse_args(["-h"])
 
     # load model manager
@@ -296,7 +302,14 @@ If you don't specify any models, then it uses LJSpeech based English model.
         print(" > Text: {}".format(args.text))
 
     # kick it
-    wav = synthesizer.tts(args.text, args.speaker_idx, args.language_idx, args.speaker_wav, reference_wav=args.reference_wav, reference_speaker_name=args.reference_speaker_idx)
+    wav = synthesizer.tts(
+        args.text,
+        args.speaker_idx,
+        args.language_idx,
+        args.speaker_wav,
+        reference_wav=args.reference_wav,
+        reference_speaker_name=args.reference_speaker_idx,
+    )
 
     # save the results
     print(" > Saving output to {}".format(args.out_path))
diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
index af3e6ec4..b8d38bac 100644
--- a/TTS/bin/train_encoder.py
+++ b/TTS/bin/train_encoder.py
@@ -9,6 +9,7 @@ import traceback
 import torch
 from torch.utils.data import DataLoader
 from trainer.torch import NoamLR
+from trainer.trainer_utils import get_optimizer
 
 from TTS.encoder.dataset import EncoderDataset
 from TTS.encoder.utils.generic_utils import save_best_model, save_checkpoint, setup_speaker_encoder_model
@@ -19,7 +20,6 @@ from TTS.tts.datasets import load_tts_samples
 from TTS.utils.audio import AudioProcessor
 from TTS.utils.generic_utils import count_parameters, remove_experiment_folder
 from TTS.utils.io import copy_model_files
-from trainer.trainer_utils import get_optimizer
 from TTS.utils.training import check_update
 
 torch.backends.cudnn.enabled = True
@@ -52,16 +52,21 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False
     sampler = PerfectBatchSampler(
         dataset.items,
         classes,
-        batch_size=num_classes_in_batch*num_utter_per_class, # total batch size
+        batch_size=num_classes_in_batch * num_utter_per_class,  # total batch size
         num_classes_in_batch=num_classes_in_batch,
         num_gpus=1,
         shuffle=not is_val,
-        drop_last=True)
+        drop_last=True,
+    )
 
     if len(classes) < num_classes_in_batch:
         if is_val:
-            raise RuntimeError(f"config.eval_num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Eval dataset) !")
-        raise RuntimeError(f"config.num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Train dataset) !")
+            raise RuntimeError(
+                f"config.eval_num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Eval dataset) !"
+            )
+        raise RuntimeError(
+            f"config.num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Train dataset) !"
+        )
 
     # set the classes to avoid get wrong class_id when the number of training and eval classes are not equal
     if is_val:
@@ -76,6 +81,7 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False
 
     return loader, classes, dataset.get_map_classid_to_classname()
 
+
 def evaluation(model, criterion, data_loader, global_step):
     eval_loss = 0
     for _, data in enumerate(data_loader):
@@ -84,8 +90,12 @@ def evaluation(model, criterion, data_loader, global_step):
             inputs, labels = data
 
             # agroup samples of each class in the batch. perfect sampler produces [3,2,1,3,2,1] we need [3,3,2,2,1,1]
-            labels = torch.transpose(labels.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch), 0, 1).reshape(labels.shape)
-            inputs = torch.transpose(inputs.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch, -1), 0, 1).reshape(inputs.shape)
+            labels = torch.transpose(
+                labels.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch), 0, 1
+            ).reshape(labels.shape)
+            inputs = torch.transpose(
+                inputs.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch, -1), 0, 1
+            ).reshape(inputs.shape)
 
             # dispatch data to GPU
             if use_cuda:
@@ -96,20 +106,23 @@ def evaluation(model, criterion, data_loader, global_step):
             outputs = model(inputs)
 
             # loss computation
-            loss = criterion(outputs.view(c.eval_num_classes_in_batch, outputs.shape[0] // c.eval_num_classes_in_batch, -1), labels)
+            loss = criterion(
+                outputs.view(c.eval_num_classes_in_batch, outputs.shape[0] // c.eval_num_classes_in_batch, -1), labels
+            )
 
             eval_loss += loss.item()
 
-    eval_avg_loss = eval_loss/len(data_loader)
+    eval_avg_loss = eval_loss / len(data_loader)
     # save stats
     dashboard_logger.eval_stats(global_step, {"loss": eval_avg_loss})
     # plot the last batch in the evaluation
     figures = {
-            "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch),
+        "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch),
     }
     dashboard_logger.eval_figures(global_step, figures)
     return eval_avg_loss
 
+
 def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, global_step):
     model.train()
     best_loss = float("inf")
@@ -124,8 +137,12 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader,
             # setup input data
             inputs, labels = data
             # agroup samples of each class in the batch. perfect sampler produces [3,2,1,3,2,1] we need [3,3,2,2,1,1]
-            labels = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(labels.shape)
-            inputs = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(inputs.shape)
+            labels = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(
+                labels.shape
+            )
+            inputs = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(
+                inputs.shape
+            )
             # ToDo: move it to a unit test
             # labels_converted = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(labels.shape)
             # inputs_converted = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(inputs.shape)
@@ -157,7 +174,9 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader,
             outputs = model(inputs)
 
             # loss computation
-            loss = criterion(outputs.view(c.num_classes_in_batch, outputs.shape[0] // c.num_classes_in_batch, -1), labels)
+            loss = criterion(
+                outputs.view(c.num_classes_in_batch, outputs.shape[0] // c.num_classes_in_batch, -1), labels
+            )
             loss.backward()
             grad_norm, _ = check_update(model, c.grad_clip)
             optimizer.step()
@@ -211,7 +230,7 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader,
         print(
             ">>> Epoch:{}  AvgLoss: {:.5f} GradNorm:{:.5f}  "
             "EpochTime:{:.2f} AvGLoaderTime:{:.2f} ".format(
-                epoch, tot_loss/len(data_loader), grad_norm, epoch_time, avg_loader_time
+                epoch, tot_loss / len(data_loader), grad_norm, epoch_time, avg_loader_time
             ),
             flush=True,
         )
@@ -222,10 +241,8 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader,
             print("\n\n")
             print("--> EVAL PERFORMANCE")
             print(
-            "   | > Epoch:{}  AvgLoss: {:.5f} ".format(
-                epoch, eval_loss
-            ),
-            flush=True,
+                "   | > Epoch:{}  AvgLoss: {:.5f} ".format(epoch, eval_loss),
+                flush=True,
             )
             # save the best checkpoint
             best_loss = save_best_model(model, optimizer, criterion, eval_loss, best_loss, OUT_PATH, global_step, epoch)
@@ -262,7 +279,9 @@ def main(args):  # pylint: disable=redefined-outer-name
         copy_model_files(c, OUT_PATH)
 
     if args.restore_path:
-        criterion, args.restore_step = model.load_checkpoint(c, args.restore_path, eval=False, use_cuda=use_cuda, criterion=criterion)
+        criterion, args.restore_step = model.load_checkpoint(
+            c, args.restore_path, eval=False, use_cuda=use_cuda, criterion=criterion
+        )
         print(" > Model restored from step %d" % args.restore_step, flush=True)
     else:
         args.restore_step = 0
diff --git a/TTS/encoder/configs/base_encoder_config.py b/TTS/encoder/configs/base_encoder_config.py
index 02b88d66..ebbaa045 100644
--- a/TTS/encoder/configs/base_encoder_config.py
+++ b/TTS/encoder/configs/base_encoder_config.py
@@ -33,10 +33,7 @@ class BaseEncoderConfig(BaseTrainingConfig):
     grad_clip: float = 3.0
     lr: float = 0.0001
     optimizer: str = "radam"
-    optimizer_params: Dict = field(default_factory=lambda: {
-        "betas": [0.9, 0.999],
-        "weight_decay": 0
-    })
+    optimizer_params: Dict = field(default_factory=lambda: {"betas": [0.9, 0.999], "weight_decay": 0})
     lr_decay: bool = False
     warmup_steps: int = 4000
 
diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py
index a4db021b..582b1fe9 100644
--- a/TTS/encoder/dataset.py
+++ b/TTS/encoder/dataset.py
@@ -5,6 +5,7 @@ from torch.utils.data import Dataset
 
 from TTS.encoder.utils.generic_utils import AugmentWAV
 
+
 class EncoderDataset(Dataset):
     def __init__(
         self,
@@ -57,7 +58,6 @@ class EncoderDataset(Dataset):
             print(f" | > Num Classes: {len(self.classes)}")
             print(f" | > Classes: {self.classes}")
 
-
     def load_wav(self, filename):
         audio = self.ap.load_wav(filename, sr=self.ap.sample_rate)
         return audio
@@ -75,9 +75,7 @@ class EncoderDataset(Dataset):
                 ]
 
         # skip classes with number of samples >= self.num_utter_per_class
-        class_to_utters = {
-            k: v for (k, v) in class_to_utters.items() if len(v) >= self.num_utter_per_class
-        }
+        class_to_utters = {k: v for (k, v) in class_to_utters.items() if len(v) >= self.num_utter_per_class}
 
         classes = list(class_to_utters.keys())
         classes.sort()
@@ -105,11 +103,11 @@ class EncoderDataset(Dataset):
 
     def get_class_list(self):
         return self.classes
+
     def set_classes(self, classes):
         self.classes = classes
         self.classname_to_classid = {key: i for i, key in enumerate(self.classes)}
 
-
     def get_map_classid_to_classname(self):
         return dict((c_id, c_n) for c_n, c_id in self.classname_to_classid.items())
 
diff --git a/TTS/encoder/losses.py b/TTS/encoder/losses.py
index de65d8d6..5b5aa0fc 100644
--- a/TTS/encoder/losses.py
+++ b/TTS/encoder/losses.py
@@ -195,6 +195,7 @@ class SoftmaxLoss(nn.Module):
         class_id = torch.argmax(activations)
         return class_id
 
+
 class SoftmaxAngleProtoLoss(nn.Module):
     """
     Implementation of the Softmax AnglePrototypical loss as defined in https://arxiv.org/abs/2009.14153
diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py
index c35c636d..ac7d7dd5 100644
--- a/TTS/encoder/models/base_encoder.py
+++ b/TTS/encoder/models/base_encoder.py
@@ -1,12 +1,13 @@
+import numpy as np
 import torch
 import torchaudio
-import numpy as np
+from coqpit import Coqpit
 from torch import nn
 
-from TTS.utils.io import load_fsspec
 from TTS.encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss
 from TTS.utils.generic_utils import set_init_dict
-from coqpit import Coqpit
+from TTS.utils.io import load_fsspec
+
 
 class PreEmphasis(nn.Module):
     def __init__(self, coefficient=0.97):
@@ -20,6 +21,7 @@ class PreEmphasis(nn.Module):
         x = torch.nn.functional.pad(x.unsqueeze(1), (1, 0), "reflect")
         return torch.nn.functional.conv1d(x, self.filter).squeeze(1)
 
+
 class BaseEncoder(nn.Module):
     """Base `encoder` class. Every new `encoder` model must inherit this.
 
@@ -32,31 +34,31 @@ class BaseEncoder(nn.Module):
 
     def get_torch_mel_spectrogram_class(self, audio_config):
         return torch.nn.Sequential(
-                PreEmphasis(audio_config["preemphasis"]),
-                # TorchSTFT(
-                #     n_fft=audio_config["fft_size"],
-                #     hop_length=audio_config["hop_length"],
-                #     win_length=audio_config["win_length"],
-                #     sample_rate=audio_config["sample_rate"],
-                #     window="hamming_window",
-                #     mel_fmin=0.0,
-                #     mel_fmax=None,
-                #     use_htk=True,
-                #     do_amp_to_db=False,
-                #     n_mels=audio_config["num_mels"],
-                #     power=2.0,
-                #     use_mel=True,
-                #     mel_norm=None,
-                # )
-                torchaudio.transforms.MelSpectrogram(
-                    sample_rate=audio_config["sample_rate"],
-                    n_fft=audio_config["fft_size"],
-                    win_length=audio_config["win_length"],
-                    hop_length=audio_config["hop_length"],
-                    window_fn=torch.hamming_window,
-                    n_mels=audio_config["num_mels"],
-                )
-            )
+            PreEmphasis(audio_config["preemphasis"]),
+            # TorchSTFT(
+            #     n_fft=audio_config["fft_size"],
+            #     hop_length=audio_config["hop_length"],
+            #     win_length=audio_config["win_length"],
+            #     sample_rate=audio_config["sample_rate"],
+            #     window="hamming_window",
+            #     mel_fmin=0.0,
+            #     mel_fmax=None,
+            #     use_htk=True,
+            #     do_amp_to_db=False,
+            #     n_mels=audio_config["num_mels"],
+            #     power=2.0,
+            #     use_mel=True,
+            #     mel_norm=None,
+            # )
+            torchaudio.transforms.MelSpectrogram(
+                sample_rate=audio_config["sample_rate"],
+                n_fft=audio_config["fft_size"],
+                win_length=audio_config["win_length"],
+                hop_length=audio_config["hop_length"],
+                window_fn=torch.hamming_window,
+                n_mels=audio_config["num_mels"],
+            ),
+        )
 
     @torch.no_grad()
     def inference(self, x, l2_norm=True):
@@ -104,7 +106,9 @@ class BaseEncoder(nn.Module):
             raise Exception("The %s  not is a loss supported" % c.loss)
         return criterion
 
-    def load_checkpoint(self, config: Coqpit, checkpoint_path: str, eval: bool = False, use_cuda: bool = False, criterion=None):
+    def load_checkpoint(
+        self, config: Coqpit, checkpoint_path: str, eval: bool = False, use_cuda: bool = False, criterion=None
+    ):
         state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"))
         try:
             self.load_state_dict(state["model"])
@@ -127,7 +131,12 @@ class BaseEncoder(nn.Module):
                 print(" > Criterion load ignored because of:", error)
 
         # instance and load the criterion for the encoder classifier in inference time
-        if eval and criterion is None and "criterion" in state and getattr(config, 'map_classid_to_classname', None) is not None:
+        if (
+            eval
+            and criterion is None
+            and "criterion" in state
+            and getattr(config, "map_classid_to_classname", None) is not None
+        ):
             criterion = self.get_criterion(config, len(config.map_classid_to_classname))
             criterion.load_state_dict(state["criterion"])
 
diff --git a/TTS/encoder/models/resnet.py b/TTS/encoder/models/resnet.py
index c4ba9537..84e9967f 100644
--- a/TTS/encoder/models/resnet.py
+++ b/TTS/encoder/models/resnet.py
@@ -4,6 +4,7 @@ from torch import nn
 # from TTS.utils.audio import TorchSTFT
 from TTS.encoder.models.base_encoder import BaseEncoder
 
+
 class SELayer(nn.Module):
     def __init__(self, channel, reduction=8):
         super(SELayer, self).__init__()
diff --git a/TTS/encoder/utils/samplers.py b/TTS/encoder/utils/samplers.py
index 947f5da0..08256b34 100644
--- a/TTS/encoder/utils/samplers.py
+++ b/TTS/encoder/utils/samplers.py
@@ -1,4 +1,5 @@
 import random
+
 from torch.utils.data.sampler import Sampler, SubsetRandomSampler
 
 
@@ -34,10 +35,21 @@ class PerfectBatchSampler(Sampler):
         drop_last (bool): if True, drops last incomplete batch.
     """
 
-    def __init__(self, dataset_items, classes, batch_size, num_classes_in_batch, num_gpus=1, shuffle=True, drop_last=False, label_key="class_name"):
+    def __init__(
+        self,
+        dataset_items,
+        classes,
+        batch_size,
+        num_classes_in_batch,
+        num_gpus=1,
+        shuffle=True,
+        drop_last=False,
+        label_key="class_name",
+    ):
         super().__init__(dataset_items)
-        assert batch_size % (num_classes_in_batch * num_gpus) == 0, (
-            'Batch size must be divisible by number of classes times the number of data parallel devices (if enabled).')
+        assert (
+            batch_size % (num_classes_in_batch * num_gpus) == 0
+        ), "Batch size must be divisible by number of classes times the number of data parallel devices (if enabled)."
 
         label_indices = {}
         for idx, item in enumerate(dataset_items):
@@ -93,7 +105,7 @@ class PerfectBatchSampler(Sampler):
                 if groups % self._dp_devices == 0:
                     yield batch
                 else:
-                    batch = batch[:(groups // self._dp_devices) * self._dp_devices * self._num_classes_in_batch]
+                    batch = batch[: (groups // self._dp_devices) * self._dp_devices * self._num_classes_in_batch]
                     if len(batch) > 0:
                         yield batch
 
diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py
index 222f8519..945c031f 100644
--- a/TTS/tts/models/base_tts.py
+++ b/TTS/tts/models/base_tts.py
@@ -7,15 +7,15 @@ import torch.distributed as dist
 from coqpit import Coqpit
 from torch import nn
 from torch.utils.data import DataLoader
+from torch.utils.data.sampler import WeightedRandomSampler
 from trainer.torch import DistributedSampler, DistributedSamplerWrapper
 
 from TTS.model import BaseTrainerModel
 from TTS.tts.datasets.dataset import TTSDataset
 from TTS.tts.utils.languages import LanguageManager, get_language_balancer_weights
-from TTS.tts.utils.speakers import SpeakerManager, get_speaker_manager, get_speaker_balancer_weights
+from TTS.tts.utils.speakers import SpeakerManager, get_speaker_balancer_weights, get_speaker_manager
 from TTS.tts.utils.synthesis import synthesis
 from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
-from torch.utils.data.sampler import WeightedRandomSampler
 
 # pylint: skip-file
 
@@ -258,7 +258,7 @@ class BaseTTS(BaseTrainerModel):
         # sampler for DDP
         if sampler is None:
             sampler = DistributedSampler(dataset) if num_gpus > 1 else None
-        else: # If a sampler is already defined use this sampler and DDP sampler together
+        else:  # If a sampler is already defined use this sampler and DDP sampler together
             sampler = DistributedSamplerWrapper(sampler) if num_gpus > 1 else sampler
 
         return sampler
diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
index 818b9a54..afadbadd 100644
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@@ -994,8 +994,11 @@ class Vits(BaseTTS):
 
         outputs = {"model_outputs": o, "alignments": attn.squeeze(1), "z": z, "z_p": z_p, "m_p": m_p, "logs_p": logs_p}
         return outputs
+
     @torch.no_grad()
-    def inference_voice_conversion(self, reference_wav, speaker_id=None, d_vector=None, reference_speaker_id=None, reference_d_vector=None):
+    def inference_voice_conversion(
+        self, reference_wav, speaker_id=None, d_vector=None, reference_speaker_id=None, reference_d_vector=None
+    ):
         """Inference for voice conversion
 
         Args:
@@ -1006,7 +1009,13 @@ class Vits(BaseTTS):
             reference_d_vector (Tensor): d_vector embedding of the reference_wav speaker. Tensor of shape `[B, C]`
         """
         # compute spectrograms
-        y = wav_to_spec(reference_wav, self.config.audio.fft_size, self.config.audio.hop_length, self.config.audio.win_length, center=False).transpose(1, 2)
+        y = wav_to_spec(
+            reference_wav,
+            self.config.audio.fft_size,
+            self.config.audio.hop_length,
+            self.config.audio.win_length,
+            center=False,
+        ).transpose(1, 2)
         y_lengths = torch.tensor([y.size(-1)]).to(y.device)
         speaker_cond_src = reference_speaker_id if reference_speaker_id is not None else reference_d_vector
         speaker_cond_tgt = speaker_id if speaker_id is not None else d_vector
diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py
index 1a5da94a..0227412d 100644
--- a/TTS/tts/utils/speakers.py
+++ b/TTS/tts/utils/speakers.py
@@ -269,7 +269,9 @@ class SpeakerManager:
         """
         self.speaker_encoder_config = load_config(config_path)
         self.speaker_encoder = setup_speaker_encoder_model(self.speaker_encoder_config)
-        self.speaker_encoder_criterion = self.speaker_encoder.load_checkpoint(self.speaker_encoder_config, model_path, eval=True, use_cuda=self.use_cuda)
+        self.speaker_encoder_criterion = self.speaker_encoder.load_checkpoint(
+            self.speaker_encoder_config, model_path, eval=True, use_cuda=self.use_cuda
+        )
         self.speaker_encoder_ap = AudioProcessor(**self.speaker_encoder_config.audio)
 
     def compute_d_vector_from_clip(self, wav_file: Union[str, List[str]]) -> list:
diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py
index 582fb4f1..f9e13251 100644
--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@@ -206,6 +206,7 @@ def synthesis(
     }
     return return_dict
 
+
 def transfer_voice(
     model,
     CONFIG,
@@ -269,12 +270,7 @@ def transfer_voice(
         _func = model.module.inference_voice_conversion
     else:
         _func = model.inference_voice_conversion
-    model_outputs = _func(
-        reference_wav,
-        speaker_id,
-        d_vector,
-        reference_speaker_id,
-        reference_d_vector)
+    model_outputs = _func(reference_wav, speaker_id, d_vector, reference_speaker_id, reference_d_vector)
 
     # convert outputs to numpy
     # plot results
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
index 687794b4..2ea23adb 100644
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@@ -119,7 +119,7 @@ class Synthesizer(object):
         if use_cuda:
             self.tts_model.cuda()
 
-        if self.encoder_checkpoint and  hasattr(self.tts_model, "speaker_manager"):
+        if self.encoder_checkpoint and hasattr(self.tts_model, "speaker_manager"):
             self.tts_model.speaker_manager.init_speaker_encoder(self.encoder_checkpoint, self.encoder_config)
 
     def _set_speaker_encoder_paths_from_tts_config(self):
@@ -199,8 +199,8 @@ class Synthesizer(object):
 
         if not text and not reference_wav:
             raise ValueError(
-                    "You need to define either `text` (for sythesis) or a `reference_wav` (for voice conversion) to use the Coqui TTS API."
-                )
+                "You need to define either `text` (for sythesis) or a `reference_wav` (for voice conversion) to use the Coqui TTS API."
+            )
 
         if text:
             sens = self.split_into_sentences(text)
@@ -214,7 +214,9 @@ class Synthesizer(object):
             if speaker_name and isinstance(speaker_name, str):
                 if self.tts_config.use_d_vector_file:
                     # get the average speaker embedding from the saved d_vectors.
-                    speaker_embedding = self.tts_model.speaker_manager.get_mean_d_vector(speaker_name, num_samples=None, randomize=False)
+                    speaker_embedding = self.tts_model.speaker_manager.get_mean_d_vector(
+                        speaker_name, num_samples=None, randomize=False
+                    )
                     speaker_embedding = np.array(speaker_embedding)[None, :]  # [1 x embedding_dim]
                 else:
                     # get speaker idx from the speaker name
@@ -315,25 +317,31 @@ class Synthesizer(object):
                 if reference_speaker_name and isinstance(reference_speaker_name, str):
                     if self.tts_config.use_d_vector_file:
                         # get the speaker embedding from the saved d_vectors.
-                        reference_speaker_embedding = self.tts_model.speaker_manager.get_d_vectors_by_speaker(reference_speaker_name)[0]
-                        reference_speaker_embedding = np.array(reference_speaker_embedding)[None, :]  # [1 x embedding_dim]
+                        reference_speaker_embedding = self.tts_model.speaker_manager.get_d_vectors_by_speaker(
+                            reference_speaker_name
+                        )[0]
+                        reference_speaker_embedding = np.array(reference_speaker_embedding)[
+                            None, :
+                        ]  # [1 x embedding_dim]
                     else:
                         # get speaker idx from the speaker name
                         reference_speaker_id = self.tts_model.speaker_manager.speaker_ids[reference_speaker_name]
                 else:
-                    reference_speaker_embedding = self.tts_model.speaker_manager.compute_d_vector_from_clip(reference_wav)
+                    reference_speaker_embedding = self.tts_model.speaker_manager.compute_d_vector_from_clip(
+                        reference_wav
+                    )
 
             outputs = transfer_voice(
-                    model=self.tts_model,
-                    CONFIG=self.tts_config,
-                    use_cuda=self.use_cuda,
-                    reference_wav=reference_wav,
-                    speaker_id=speaker_id,
-                    d_vector=speaker_embedding,
-                    use_griffin_lim=use_gl,
-                    reference_speaker_id=reference_speaker_id,
-                    reference_d_vector=reference_speaker_embedding
-                )
+                model=self.tts_model,
+                CONFIG=self.tts_config,
+                use_cuda=self.use_cuda,
+                reference_wav=reference_wav,
+                speaker_id=speaker_id,
+                d_vector=speaker_embedding,
+                use_griffin_lim=use_gl,
+                reference_speaker_id=reference_speaker_id,
+                reference_d_vector=reference_speaker_embedding,
+            )
             waveform = outputs
             if not use_gl:
                 mel_postnet_spec = outputs[0].detach().cpu().numpy()
diff --git a/recipes/ljspeech/hifigan/train_hifigan.py b/recipes/ljspeech/hifigan/train_hifigan.py
index 6a739009..b4cbae63 100644
--- a/recipes/ljspeech/hifigan/train_hifigan.py
+++ b/recipes/ljspeech/hifigan/train_hifigan.py
@@ -41,11 +41,6 @@ model = GAN(config, ap)
 
 # init the trainer and 🚀
 trainer = Trainer(
-    TrainerArgs(),
-    config,
-    output_path,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples
+    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
 )
 trainer.fit()
diff --git a/recipes/ljspeech/multiband_melgan/train_multiband_melgan.py b/recipes/ljspeech/multiband_melgan/train_multiband_melgan.py
index d5ca9a76..225f5a30 100644
--- a/recipes/ljspeech/multiband_melgan/train_multiband_melgan.py
+++ b/recipes/ljspeech/multiband_melgan/train_multiband_melgan.py
@@ -41,11 +41,6 @@ model = GAN(config, ap)
 
 # init the trainer and 🚀
 trainer = Trainer(
-    TrainerArgs(),
-    config,
-    output_path,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples
+    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
 )
 trainer.fit()
diff --git a/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py b/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py
index a0ff8b02..04e6150e 100644
--- a/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py
+++ b/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py
@@ -84,11 +84,6 @@ model = Tacotron2(config, ap, tokenizer, speaker_manager=None)
 
 # init the trainer and 🚀
 trainer = Trainer(
-    TrainerArgs(),
-    config,
-    output_path,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples
+    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
 )
 trainer.fit()
diff --git a/recipes/ljspeech/univnet/train.py b/recipes/ljspeech/univnet/train.py
index 592b9a76..81d2b889 100644
--- a/recipes/ljspeech/univnet/train.py
+++ b/recipes/ljspeech/univnet/train.py
@@ -40,11 +40,6 @@ model = GAN(config, ap)
 
 # init the trainer and 🚀
 trainer = Trainer(
-    TrainerArgs(),
-    config,
-    output_path,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples
+    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
 )
 trainer.fit()
diff --git a/recipes/multilingual/vits_tts/train_vits_tts.py b/recipes/multilingual/vits_tts/train_vits_tts.py
index c4ed0dda..26eb46be 100644
--- a/recipes/multilingual/vits_tts/train_vits_tts.py
+++ b/recipes/multilingual/vits_tts/train_vits_tts.py
@@ -6,12 +6,11 @@ from trainer import Trainer, TrainerArgs
 from TTS.config.shared_configs import BaseAudioConfig
 from TTS.tts.configs.shared_configs import BaseDatasetConfig
 from TTS.tts.configs.vits_config import VitsConfig
-from TTS.tts.models.vits import CharactersConfig
 from TTS.tts.datasets import load_tts_samples
-from TTS.tts.models.vits import Vits, VitsArgs
+from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs
 from TTS.tts.utils.languages import LanguageManager
-from TTS.tts.utils.text.tokenizer import TTSTokenizer
 from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
 from TTS.utils.audio import AudioProcessor
 
 output_path = os.path.dirname(os.path.abspath(__file__))
@@ -131,11 +130,6 @@ model = Vits(config, ap, tokenizer, speaker_manager, language_manager)
 
 # init the trainer and 🚀
 trainer = Trainer(
-    TrainerArgs(),
-    config,
-    output_path,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples
+    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
 )
 trainer.fit()
diff --git a/tests/data_tests/test_samplers.py b/tests/data_tests/test_samplers.py
index c888c629..42f1bfd5 100644
--- a/tests/data_tests/test_samplers.py
+++ b/tests/data_tests/test_samplers.py
@@ -1,14 +1,13 @@
 import functools
-
 import unittest
 
 import torch
 
 from TTS.config.shared_configs import BaseDatasetConfig
+from TTS.encoder.utils.samplers import PerfectBatchSampler
 from TTS.tts.datasets import load_tts_samples
 from TTS.tts.utils.languages import get_language_balancer_weights
 from TTS.tts.utils.speakers import get_speaker_balancer_weights
-from TTS.encoder.utils.samplers import PerfectBatchSampler
 
 # Fixing random state to avoid random fails
 torch.manual_seed(0)
@@ -60,7 +59,9 @@ class TestSamplers(unittest.TestCase):
         assert not is_balanced(en, pt), "Random sampler is supposed to be unbalanced"
 
     def test_language_weighted_random_sampler(self):  # pylint: disable=no-self-use
-        weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler(get_language_balancer_weights(train_samples), len(train_samples))
+        weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler(
+            get_language_balancer_weights(train_samples), len(train_samples)
+        )
         ids = functools.reduce(lambda a, b: a + b, [list(weighted_sampler) for i in range(100)])
         en, pt = 0, 0
         for index in ids:
@@ -73,7 +74,9 @@ class TestSamplers(unittest.TestCase):
 
     def test_speaker_weighted_random_sampler(self):  # pylint: disable=no-self-use
 
-        weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler(get_speaker_balancer_weights(train_samples), len(train_samples))
+        weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler(
+            get_speaker_balancer_weights(train_samples), len(train_samples)
+        )
         ids = functools.reduce(lambda a, b: a + b, [list(weighted_sampler) for i in range(100)])
         spk1, spk2 = 0, 0
         for index in ids:
@@ -92,11 +95,12 @@ class TestSamplers(unittest.TestCase):
         sampler = PerfectBatchSampler(
             train_samples,
             classes,
-            batch_size=2 * 3, # total batch size
+            batch_size=2 * 3,  # total batch size
             num_classes_in_batch=2,
             label_key="speaker_name",
             shuffle=False,
-            drop_last=True)
+            drop_last=True,
+        )
         batchs = functools.reduce(lambda a, b: a + b, [list(sampler) for i in range(100)])
         for batch in batchs:
             spk1, spk2 = 0, 0
@@ -116,11 +120,12 @@ class TestSamplers(unittest.TestCase):
         sampler = PerfectBatchSampler(
             train_samples,
             classes,
-            batch_size=2 * 3, # total batch size
+            batch_size=2 * 3,  # total batch size
             num_classes_in_batch=2,
             label_key="speaker_name",
             shuffle=True,
-            drop_last=False)
+            drop_last=False,
+        )
         batchs = functools.reduce(lambda a, b: a + b, [list(sampler) for i in range(100)])
         for batch in batchs:
             spk1, spk2 = 0, 0