mirror of https://github.com/coqui-ai/TTS.git
docstrings for common layers
parent
a70917a030
commit
cc2b1e043d
|
@ -1,10 +1,17 @@
|
|||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from scipy.stats import betabinom
|
||||
|
||||
|
||||
class Linear(nn.Module):
|
||||
"""Linear layer with a specific initialization.
|
||||
|
||||
Args:
|
||||
in_features (int): number of channels in the input tensor.
|
||||
out_features (int): number of channels in the output tensor.
|
||||
bias (bool, optional): enable/disable bias in the layer. Defaults to True.
|
||||
init_gain (str, optional): method to compute the gain in the weight initializtion based on the nonlinear activation used afterwards. Defaults to 'linear'.
|
||||
"""
|
||||
def __init__(self,
|
||||
in_features,
|
||||
out_features,
|
||||
|
@ -25,6 +32,16 @@ class Linear(nn.Module):
|
|||
|
||||
|
||||
class LinearBN(nn.Module):
|
||||
"""Linear layer with Batch Normalization.
|
||||
|
||||
x -> linear -> BN -> o
|
||||
|
||||
Args:
|
||||
in_features (int): number of channels in the input tensor.
|
||||
out_features (int ): number of channels in the output tensor.
|
||||
bias (bool, optional): enable/disable bias in the linear layer. Defaults to True.
|
||||
init_gain (str, optional): method to set the gain for weight initialization. Defaults to 'linear'.
|
||||
"""
|
||||
def __init__(self,
|
||||
in_features,
|
||||
out_features,
|
||||
|
@ -42,6 +59,10 @@ class LinearBN(nn.Module):
|
|||
gain=torch.nn.init.calculate_gain(init_gain))
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Shapes:
|
||||
x: [T, B, C] or [B, C]
|
||||
"""
|
||||
out = self.linear_layer(x)
|
||||
if len(out.shape) == 3:
|
||||
out = out.permute(1, 2, 0)
|
||||
|
@ -52,6 +73,29 @@ class LinearBN(nn.Module):
|
|||
|
||||
|
||||
class Prenet(nn.Module):
|
||||
"""Tacotron specific Prenet with an optional Batch Normalization.
|
||||
|
||||
Note:
|
||||
Prenet with BN improves the model performance significantly especially
|
||||
if it is enabled after learning a diagonal attention alignment with the original
|
||||
prenet. However, if the target dataset is high quality then it also works from
|
||||
the start. It is also suggested to disable dropout if BN is in use.
|
||||
|
||||
prenet_type == "original"
|
||||
x -> [linear -> ReLU -> Dropout]xN -> o
|
||||
|
||||
prenet_type == "bn"
|
||||
x -> [linear -> BN -> ReLU -> Dropout]xN -> o
|
||||
|
||||
Args:
|
||||
in_features (int): number of channels in the input tensor and the inner layers.
|
||||
prenet_type (str, optional): prenet type "original" or "bn". Defaults to "original".
|
||||
prenet_dropout (bool, optional): dropout rate. Defaults to True.
|
||||
out_features (list, optional): List of output channels for each prenet block.
|
||||
It also defines number of the prenet blocks based on the length of argument list.
|
||||
Defaults to [256, 256].
|
||||
bias (bool, optional): enable/disable bias in prenet linear layers. Defaults to True.
|
||||
"""
|
||||
# pylint: disable=dangerous-default-value
|
||||
def __init__(self,
|
||||
in_features,
|
||||
|
|
Loading…
Reference in New Issue