From 9ee48c928698f25a08e10e1a18fe46cf38c35a76 Mon Sep 17 00:00:00 2001 From: Michael Nguyen Date: Wed, 5 Sep 2018 12:38:03 -0500 Subject: [PATCH] fixed dropout and prenet bug --- analyze.py | 1 + hparams.py | 2 +- models/modules.py | 2 +- models/tacotron.py | 4 +++- 4 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 analyze.py diff --git a/analyze.py b/analyze.py new file mode 100644 index 0000000..198c055 --- /dev/null +++ b/analyze.py @@ -0,0 +1 @@ +# visualisation tools for mimic2 \ No newline at end of file diff --git a/hparams.py b/hparams.py index 152ac06..8b7b64a 100644 --- a/hparams.py +++ b/hparams.py @@ -34,7 +34,7 @@ hparams = tf.contrib.training.HParams( adam_beta2=0.999, initial_learning_rate=0.0015, learning_rate_decay_halflife=100000, - use_cmudict=True, # Use CMUDict during training to learn pronunciation of ARPAbet phonemes + use_cmudict=False, # Use CMUDict during training to learn pronunciation of ARPAbet phonemes # Eval: max_iters=200, diff --git a/models/modules.py b/models/modules.py index c74bf64..eaba17b 100644 --- a/models/modules.py +++ b/models/modules.py @@ -8,7 +8,7 @@ def prenet(inputs, is_training, layer_sizes=[256, 128], scope=None): with tf.variable_scope(scope or 'prenet'): for i, size in enumerate(layer_sizes): dense = tf.layers.dense(x, units=size, activation=tf.nn.relu, name='dense_%d' % (i+1)) - x = tf.layers.dropout(dense, rate=drop_rate, training=True, name='dropout_%d' % (i+1)) + x = tf.layers.dropout(dense, rate=drop_rate, training=is_training, name='dropout_%d' % (i+1)) return x diff --git a/models/tacotron.py b/models/tacotron.py index eb50589..a5dc288 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -49,11 +49,13 @@ class Tacotron(): # Attention attention_cell = AttentionWrapper( - DecoderPrenetWrapper(GRUCell(256), is_training), + GRUCell(256), LocationSensitiveAttention(256, encoder_outputs), alignment_history=True, output_attention=False) # [N, T_in, 256] + attention_cell = DecoderPrenetWrapper(attention_cell, is_training) + # Concatenate attention context vector and RNN cell output into a 512D vector. concat_cell = ConcatOutputAndAttentionWrapper(attention_cell) # [N, T_in, 512]