use ReLU for GMM

2019-11-08 10:03:58 +01:00 · 2019-11-08 10:03:58 +01:00 · 2966e3f2d1
parent b904bc02d6
commit 2966e3f2d1
1 changed files with 3 additions and 3 deletions
--- a/layers/common_layers.py
+++ b/layers/common_layers.py
@ -120,7 +120,7 @@ class GravesAttention(nn.Module):
        self.J = None
        self.N_a = nn.Sequential(
            nn.Linear(query_dim, query_dim, bias=True),
-            nn.Tanh(),
+            nn.ReLU(),
            nn.Linear(query_dim, 3*K, bias=True))
        self.attention_weights = None
        self.mu_prev = None
@ -163,7 +163,7 @@ class GravesAttention(nn.Module):
        sig_t = torch.pow(torch.nn.functional.softplus(b_t), 2)
        mu_t = self.mu_prev + torch.nn.functional.softplus(k_t)
        # TODO try sigmoid here
-        g_t = (torch.softmax(g_t, dim=-1) / sig_t) * self.COEF
+        g_t = (torch.softmax(g_t, dim=-1) / sig_t) 

        # each B x K x T_in
        g_t = g_t.unsqueeze(2).expand(g_t.size(0),
@ -175,7 +175,7 @@ class GravesAttention(nn.Module):

        # attention weights
        phi_t = g_t * torch.exp(-0.5 * sig_t * (mu_t_ - j)**2)
-        alpha_t = torch.sum(phi_t, 1)
+        alpha_t = self.COEF * torch.sum(phi_t, 1)

        # apply masking
        if mask is not None: