mimic2/models/modules.py

102 lines
2.8 KiB
Python

import tensorflow as tf
from tensorflow.contrib.rnn import GRUCell
def prenet(inputs, is_training, layer_sizes=[256, 128], scope=None):
x = inputs
drop_rate = 0.5 if is_training else 0.0
with tf.variable_scope(scope or 'prenet'):
for i, size in enumerate(layer_sizes):
dense = tf.layers.dense(x, units=size, activation=tf.nn.relu, name='dense_%d' % (i+1))
x = tf.layers.dropout(dense, rate=drop_rate, training=True, name='dropout_%d' % (i+1))
return x
def encoder_cbhg(inputs, input_lengths, is_training):
return cbhg(
inputs,
input_lengths,
is_training,
scope='encoder_cbhg',
K=16,
projections=[128, 128])
def post_cbhg(inputs, input_dim, is_training):
return cbhg(
inputs,
None,
is_training,
scope='post_cbhg',
K=8,
projections=[256, input_dim])
def cbhg(inputs, input_lengths, is_training, scope, K, projections):
with tf.variable_scope(scope):
with tf.variable_scope('conv_bank'):
# Convolution bank: concatenate on the last axis to stack channels from all convolutions
conv_outputs = tf.concat(
[conv1d(inputs, k, 128, tf.nn.relu, is_training, 'conv1d_%d' % k) for k in range(1, K+1)],
axis=-1
)
# Maxpooling:
maxpool_output = tf.layers.max_pooling1d(
conv_outputs,
pool_size=2,
strides=1,
padding='same')
# Two projection layers:
proj1_output = conv1d(maxpool_output, 3, projections[0], tf.nn.relu, is_training, 'proj_1')
proj2_output = conv1d(proj1_output, 3, projections[1], None, is_training, 'proj_2')
# Residual connection:
highway_input = proj2_output + inputs
# Handle dimensionality mismatch:
if highway_input.shape[2] != 128:
highway_input = tf.layers.dense(highway_input, 128)
# 4-layer HighwayNet:
for i in range(4):
highway_input = highwaynet(highway_input, 'highway_%d' % (i+1))
rnn_input = highway_input
# Bidirectional RNN
outputs, states = tf.nn.bidirectional_dynamic_rnn(
GRUCell(128),
GRUCell(128),
rnn_input,
sequence_length=input_lengths,
dtype=tf.float32)
return tf.concat(outputs, axis=2) # Concat forward and backward
def highwaynet(inputs, scope):
with tf.variable_scope(scope):
H = tf.layers.dense(
inputs,
units=128,
activation=tf.nn.relu,
name='H')
T = tf.layers.dense(
inputs,
units=128,
activation=tf.nn.sigmoid,
name='T',
bias_initializer=tf.constant_initializer(-1.0))
return H * T + inputs * (1.0 - T)
def conv1d(inputs, kernel_size, channels, activation, is_training, scope):
with tf.variable_scope(scope):
conv1d_output = tf.layers.conv1d(
inputs,
filters=channels,
kernel_size=kernel_size,
activation=activation,
padding='same')
return tf.layers.batch_normalization(conv1d_output, training=is_training)