diff --git a/hparams.py b/hparams.py index 3e627c8..80488ed 100644 --- a/hparams.py +++ b/hparams.py @@ -26,8 +26,8 @@ hparams = tf.contrib.training.HParams( batch_size=32, adam_beta1=0.9, adam_beta2=0.999, - initial_learning_rate=0.002, - decay_learning_rate=True, + initial_learning_rate=0.0015, + learning_rate_decay_halflife=100000, use_cmudict=False, # Use CMUDict during training to learn pronunciation of ARPAbet phonemes # Eval: diff --git a/models/tacotron.py b/models/tacotron.py index e97cc77..eb50589 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -127,10 +127,8 @@ class Tacotron(): ''' with tf.variable_scope('optimizer') as scope: hp = self._hparams - if hp.decay_learning_rate: - self.learning_rate = _learning_rate_decay(hp.initial_learning_rate, global_step) - else: - self.learning_rate = tf.convert_to_tensor(hp.initial_learning_rate) + self.learning_rate = tf.train.exponential_decay( + hp.initial_learning_rate, global_step, hp.learning_rate_decay_halflife, 0.5) optimizer = tf.train.AdamOptimizer(self.learning_rate, hp.adam_beta1, hp.adam_beta2) gradients, variables = zip(*optimizer.compute_gradients(self.loss)) self.gradients = gradients @@ -141,10 +139,3 @@ class Tacotron(): with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.optimize = optimizer.apply_gradients(zip(clipped_gradients, variables), global_step=global_step) - - -def _learning_rate_decay(init_lr, global_step): - # Noam scheme from tensor2tensor: - warmup_steps = 4000.0 - step = tf.cast(global_step + 1, dtype=tf.float32) - return init_lr * warmup_steps**0.5 * tf.minimum(step * warmup_steps**-1.5, step**-0.5)