docstrings for common layers

2021-01-11 15:06:12 +01:00 · 2021-01-11 15:06:12 +01:00 · cc2b1e043d
parent a70917a030
commit cc2b1e043d
2 changed files with 46 additions and 2 deletions
--- a/TTS/tts/layers/common_layers.py
+++ b/TTS/tts/layers/common_layers.py
@ -1,10 +1,17 @@
 import torch
 from torch import nn
 from torch.nn import functional as F
-from scipy.stats import betabinom


 class Linear(nn.Module):
+    """Linear layer with a specific initialization.
+
+    Args:
+        in_features (int): number of channels in the input tensor.
+        out_features (int): number of channels in the output tensor.
+        bias (bool, optional): enable/disable bias in the layer. Defaults to True.
+        init_gain (str, optional): method to compute the gain in the weight initializtion based on the nonlinear activation used afterwards. Defaults to 'linear'.
+    """
    def __init__(self,
                 in_features,
                 out_features,
@ -25,6 +32,16 @@ class Linear(nn.Module):


 class LinearBN(nn.Module):
+    """Linear layer with Batch Normalization.
+
+    x -> linear -> BN -> o
+
+    Args:
+        in_features (int): number of channels in the input tensor.
+        out_features (int ): number of channels in the output tensor.
+        bias (bool, optional): enable/disable bias in the linear layer. Defaults to True.
+        init_gain (str, optional): method to set the gain for weight initialization. Defaults to 'linear'.
+    """
    def __init__(self,
                 in_features,
                 out_features,
@ -42,6 +59,10 @@ class LinearBN(nn.Module):
            gain=torch.nn.init.calculate_gain(init_gain))

    def forward(self, x):
+        """
+        Shapes:
+            x: [T, B, C] or [B, C]
+        """
        out = self.linear_layer(x)
        if len(out.shape) == 3:
            out = out.permute(1, 2, 0)
@ -52,6 +73,29 @@ class LinearBN(nn.Module):


 class Prenet(nn.Module):
+    """Tacotron specific Prenet with an optional Batch Normalization.
+
+    Note:
+        Prenet with BN improves the model performance significantly especially
+    if it is enabled after learning a diagonal attention alignment with the original
+    prenet. However, if the target dataset is high quality then it also works from
+    the start. It is also suggested to disable dropout if BN is in use.
+
+        prenet_type == "original"
+            x -> [linear -> ReLU -> Dropout]xN -> o
+
+        prenet_type == "bn"
+            x -> [linear -> BN -> ReLU -> Dropout]xN -> o
+
+    Args:
+        in_features (int): number of channels in the input tensor and the inner layers.
+        prenet_type (str, optional): prenet type "original" or "bn". Defaults to "original".
+        prenet_dropout (bool, optional): dropout rate. Defaults to True.
+        out_features (list, optional): List of output channels for each prenet block.
+            It also defines number of the prenet blocks based on the length of argument list.
+            Defaults to [256, 256].
+        bias (bool, optional): enable/disable bias in prenet linear layers. Defaults to True.
+    """
    # pylint: disable=dangerous-default-value
    def __init__(self,
                 in_features,