mirror of https://github.com/MycroftAI/mimic2.git
Use vanilla exponential decay for learning rate
parent
1084310ed1
commit
70ba4aaf69
|
@ -26,8 +26,8 @@ hparams = tf.contrib.training.HParams(
|
|||
batch_size=32,
|
||||
adam_beta1=0.9,
|
||||
adam_beta2=0.999,
|
||||
initial_learning_rate=0.002,
|
||||
decay_learning_rate=True,
|
||||
initial_learning_rate=0.0015,
|
||||
learning_rate_decay_halflife=100000,
|
||||
use_cmudict=False, # Use CMUDict during training to learn pronunciation of ARPAbet phonemes
|
||||
|
||||
# Eval:
|
||||
|
|
|
@ -127,10 +127,8 @@ class Tacotron():
|
|||
'''
|
||||
with tf.variable_scope('optimizer') as scope:
|
||||
hp = self._hparams
|
||||
if hp.decay_learning_rate:
|
||||
self.learning_rate = _learning_rate_decay(hp.initial_learning_rate, global_step)
|
||||
else:
|
||||
self.learning_rate = tf.convert_to_tensor(hp.initial_learning_rate)
|
||||
self.learning_rate = tf.train.exponential_decay(
|
||||
hp.initial_learning_rate, global_step, hp.learning_rate_decay_halflife, 0.5)
|
||||
optimizer = tf.train.AdamOptimizer(self.learning_rate, hp.adam_beta1, hp.adam_beta2)
|
||||
gradients, variables = zip(*optimizer.compute_gradients(self.loss))
|
||||
self.gradients = gradients
|
||||
|
@ -141,10 +139,3 @@ class Tacotron():
|
|||
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
|
||||
self.optimize = optimizer.apply_gradients(zip(clipped_gradients, variables),
|
||||
global_step=global_step)
|
||||
|
||||
|
||||
def _learning_rate_decay(init_lr, global_step):
|
||||
# Noam scheme from tensor2tensor:
|
||||
warmup_steps = 4000.0
|
||||
step = tf.cast(global_step + 1, dtype=tf.float32)
|
||||
return init_lr * warmup_steps**0.5 * tf.minimum(step * warmup_steps**-1.5, step**-0.5)
|
||||
|
|
Loading…
Reference in New Issue