From 9ee48c928698f25a08e10e1a18fe46cf38c35a76 Mon Sep 17 00:00:00 2001
From: Michael Nguyen <ppnguyen91@gmail.com>
Date: Wed, 5 Sep 2018 12:38:03 -0500
Subject: [PATCH] fixed dropout and prenet bug

---
 analyze.py         | 1 +
 hparams.py         | 2 +-
 models/modules.py  | 2 +-
 models/tacotron.py | 4 +++-
 4 files changed, 6 insertions(+), 3 deletions(-)
 create mode 100644 analyze.py

diff --git a/analyze.py b/analyze.py
new file mode 100644
index 0000000..198c055
--- /dev/null
+++ b/analyze.py
@@ -0,0 +1 @@
+# visualisation tools for mimic2 
\ No newline at end of file
diff --git a/hparams.py b/hparams.py
index 152ac06..8b7b64a 100644
--- a/hparams.py
+++ b/hparams.py
@@ -34,7 +34,7 @@ hparams = tf.contrib.training.HParams(
     adam_beta2=0.999,
     initial_learning_rate=0.0015,
     learning_rate_decay_halflife=100000,
-    use_cmudict=True,   # Use CMUDict during training to learn pronunciation of ARPAbet phonemes
+    use_cmudict=False,   # Use CMUDict during training to learn pronunciation of ARPAbet phonemes
 
     # Eval:
     max_iters=200,
diff --git a/models/modules.py b/models/modules.py
index c74bf64..eaba17b 100644
--- a/models/modules.py
+++ b/models/modules.py
@@ -8,7 +8,7 @@ def prenet(inputs, is_training, layer_sizes=[256, 128], scope=None):
   with tf.variable_scope(scope or 'prenet'):
     for i, size in enumerate(layer_sizes):
       dense = tf.layers.dense(x, units=size, activation=tf.nn.relu, name='dense_%d' % (i+1))
-      x = tf.layers.dropout(dense, rate=drop_rate, training=True, name='dropout_%d' % (i+1))
+      x = tf.layers.dropout(dense, rate=drop_rate, training=is_training, name='dropout_%d' % (i+1))
   return x
 
 
diff --git a/models/tacotron.py b/models/tacotron.py
index eb50589..a5dc288 100644
--- a/models/tacotron.py
+++ b/models/tacotron.py
@@ -49,11 +49,13 @@ class Tacotron():
 
       # Attention
       attention_cell = AttentionWrapper(
-        DecoderPrenetWrapper(GRUCell(256), is_training),
+        GRUCell(256),
         LocationSensitiveAttention(256, encoder_outputs),
         alignment_history=True,
         output_attention=False)                                                  # [N, T_in, 256]
 
+      attention_cell = DecoderPrenetWrapper(attention_cell, is_training)
+
       # Concatenate attention context vector and RNN cell output into a 512D vector.
       concat_cell = ConcatOutputAndAttentionWrapper(attention_cell)              # [N, T_in, 512]