import tensorflow as tf from tensorflow.contrib.rnn import GRUCell def prenet(inputs, is_training, layer_sizes=[256, 128], scope=None): x = inputs drop_rate = 0.5 if is_training else 0.0 with tf.variable_scope(scope or 'prenet'): for i, size in enumerate(layer_sizes): dense = tf.layers.dense(x, units=size, activation=tf.nn.relu, name='dense_%d' % (i+1)) x = tf.layers.dropout(dense, rate=drop_rate, training=is_training, name='dropout_%d' % (i+1)) return x def encoder_cbhg(inputs, input_lengths, is_training): return cbhg( inputs, input_lengths, is_training, scope='encoder_cbhg', K=16, projections=[128, 128]) def post_cbhg(inputs, input_dim, is_training): return cbhg( inputs, None, is_training, scope='post_cbhg', K=8, projections=[256, input_dim]) def cbhg(inputs, input_lengths, is_training, scope, K, projections): with tf.variable_scope(scope): with tf.variable_scope('conv_bank'): # Convolution bank: concatenate on the last axis to stack channels from all convolutions conv_outputs = tf.concat( [conv1d(inputs, k, 128, tf.nn.relu, is_training, 'conv1d_%d' % k) for k in range(1, K+1)], axis=-1 ) # Maxpooling: maxpool_output = tf.layers.max_pooling1d( conv_outputs, pool_size=2, strides=1, padding='same') # Two projection layers: proj1_output = conv1d(maxpool_output, 3, projections[0], tf.nn.relu, is_training, 'proj_1') proj2_output = conv1d(proj1_output, 3, projections[1], None, is_training, 'proj_2') # Residual connection: highway_input = proj2_output + inputs # Handle dimensionality mismatch: if highway_input.shape[2] != 128: highway_input = tf.layers.dense(highway_input, 128) # 4-layer HighwayNet: for i in range(4): highway_input = highwaynet(highway_input, 'highway_%d' % (i+1)) rnn_input = highway_input # Bidirectional RNN outputs, states = tf.nn.bidirectional_dynamic_rnn( GRUCell(128), GRUCell(128), rnn_input, sequence_length=input_lengths, dtype=tf.float32) return tf.concat(outputs, axis=2) # Concat forward and backward def highwaynet(inputs, scope): with tf.variable_scope(scope): H = tf.layers.dense( inputs, units=128, activation=tf.nn.relu, name='H') T = tf.layers.dense( inputs, units=128, activation=tf.nn.sigmoid, name='T', bias_initializer=tf.constant_initializer(-1.0)) return H * T + inputs * (1.0 - T) def conv1d(inputs, kernel_size, channels, activation, is_training, scope): with tf.variable_scope(scope): conv1d_output = tf.layers.conv1d( inputs, filters=channels, kernel_size=kernel_size, activation=activation, padding='same') return tf.layers.batch_normalization(conv1d_output, training=is_training)