docstrings for common layers

2021-01-11 15:06:12 +01:00 · 2021-01-11 15:06:12 +01:00 · cc2b1e043d
parent a70917a030
commit cc2b1e043d
2 changed files with 46 additions and 2 deletions
--- a/TTS/bin/synthesize.py
+++ b/TTS/bin/synthesize.py
@ -44,7 +44,7 @@ def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid
        # Use alternative when using output npy file from tune_wavegrad
        # beta = np.load("output-tune-wavegrad.npy", allow_pickle=True).item()
        # vocoder_model.compute_noise_level(beta['beta'])
-        
+
        device_type = "cuda" if use_cuda else "cpu"
        waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).to(device_type).unsqueeze(0))
    if use_cuda and not use_gl:
--- a/TTS/tts/layers/common_layers.py
+++ b/TTS/tts/layers/common_layers.py
@ -1,10 +1,17 @@
 import torch
 from torch import nn
 from torch.nn import functional as F
 from scipy.stats import betabinom
 class Linear(nn.Module):
    """Linear layer with a specific initialization.
    Args:
        in_features (int): number of channels in the input tensor.
        out_features (int): number of channels in the output tensor.
        bias (bool, optional): enable/disable bias in the layer. Defaults to True.
        init_gain (str, optional): method to compute the gain in the weight initializtion based on the nonlinear activation used afterwards. Defaults to 'linear'.
    """
    def __init__(self,
                 in_features,
                 out_features,
@ -25,6 +32,16 @@ class Linear(nn.Module):
 class LinearBN(nn.Module):
    """Linear layer with Batch Normalization.
    x -> linear -> BN -> o
    Args:
        in_features (int): number of channels in the input tensor.
        out_features (int ): number of channels in the output tensor.
        bias (bool, optional): enable/disable bias in the linear layer. Defaults to True.
        init_gain (str, optional): method to set the gain for weight initialization. Defaults to 'linear'.
    """
    def __init__(self,
                 in_features,
                 out_features,
@ -42,6 +59,10 @@ class LinearBN(nn.Module):
            gain=torch.nn.init.calculate_gain(init_gain))
    def forward(self, x):
        """
        Shapes:
            x: [T, B, C] or [B, C]
        """
        out = self.linear_layer(x)
        if len(out.shape) == 3:
            out = out.permute(1, 2, 0)
@ -52,6 +73,29 @@ class LinearBN(nn.Module):
 class Prenet(nn.Module):
    """Tacotron specific Prenet with an optional Batch Normalization.
    Note:
        Prenet with BN improves the model performance significantly especially
    if it is enabled after learning a diagonal attention alignment with the original
    prenet. However, if the target dataset is high quality then it also works from
    the start. It is also suggested to disable dropout if BN is in use.
        prenet_type == "original"
            x -> [linear -> ReLU -> Dropout]xN -> o
        prenet_type == "bn"
            x -> [linear -> BN -> ReLU -> Dropout]xN -> o
    Args:
        in_features (int): number of channels in the input tensor and the inner layers.
        prenet_type (str, optional): prenet type "original" or "bn". Defaults to "original".
        prenet_dropout (bool, optional): dropout rate. Defaults to True.
        out_features (list, optional): List of output channels for each prenet block.
            It also defines number of the prenet blocks based on the length of argument list.
            Defaults to [256, 256].
        bias (bool, optional): enable/disable bias in prenet linear layers. Defaults to True.
    """
    # pylint: disable=dangerous-default-value
    def __init__(self,
                 in_features,