added docker files, and return alignment during synthesizing

pull/15/head
Michael Nguyen 2018-07-05 13:13:55 -05:00
parent 3924367392
commit 45584e8a93
4 changed files with 65 additions and 24 deletions

8
cpu.Dockerfile Normal file
View File

@ -0,0 +1,8 @@
FROM tensorflow/tensorflow:1.5.0
RUN mkdir /root/mimic2
COPY . /root/mimic2
WORKDIR /root/mimic2
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "/bin/bash" ]

42
eval.py
View File

@ -8,15 +8,16 @@ from util import plot
sentences = [
# From July 8, 2017 New York Times:
# 'Scientists at the CERN laboratory say they have discovered a new particle.',
# 'Theres a way to measure the acute emotional intelligence that has never gone out of style.',
# 'President Trump met with other leaders at the Group of 20 conference.',
# 'The Senate\'s bill to repeal and replace the Affordable Care Act is now imperiled.',
# # From Google's Tacotron example page:
# 'Generative adversarial network or variational auto-encoder.',
# 'The buses aren\'t the problem, they actually provide a solution.',
# 'Does the quick brown fox jump over the lazy dog?',
# 'Talib Kweli confirmed to AllHipHop that he will be releasing an album in the next year.',
'Scientists at the CERN laboratory say they have discovered a new particle.',
'Theres a way to measure the acute emotional intelligence that has never gone out of style.',
'President Trump met with other leaders at the Group of 20 conference.',
'The Senate\'s bill to repeal and replace the Affordable Care Act is now imperiled.',
# From Google's Tacotron example page:
'Generative adversarial network or variational auto-encoder.',
'The buses aren\'t the problem, they actually provide a solution.',
'Does the quick brown fox jump over the lazy dog?',
'Talib Kweli confirmed to AllHipHop that he will be releasing an album in the next year.',
# From mycroft
"It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
"Be a voice, not an echo.",
"The human voice is the most perfect instrument of all.",
@ -57,15 +58,26 @@ def run_eval(args):
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', required=True,
help='Path to model checkpoint')
parser.add_argument('--hparams', default='',
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
parser.add_argument('--force_cpu', default=False,
help='Force synthesize with cpu')
parser.add_argument(
'--checkpoint', required=True,
help='Path to model checkpoint')
parser.add_argument(
'--hparams', default='',
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
parser.add_argument(
'--force_cpu', default=False,
help='Force synthesize with cpu')
parser.add_argument(
'--gpu_assignment', default='0',
help='Set the gpu the model should run on')
args = parser.parse_args()
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_assignment
if args.force_cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
hparams.parse(args.hparams)
run_eval(args)

8
gpu.Dockerfile Normal file
View File

@ -0,0 +1,8 @@
FROM tensorflow/tensorflow:1.5.0-gpu
RUN mkdir /root/mimic2
COPY . /root/mimic2
WORKDIR /root/mimic2
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "/bin/bash" ]

View File

@ -1,13 +1,18 @@
import io
import math
import numpy as np
import tensorflow as tf
from hparams import hparams
from hparams import hparams, hparams_debug_string
from librosa import effects
from models import create_model
from text import text_to_sequence
from util import audio
def find_alignment_endpoint(alignment_shape, ratio):
return math.ceil(alignment_shape[1] * ratio)
class Synthesizer:
def load(self, checkpoint_path, model_name='tacotron'):
print('Constructing model: %s' % model_name)
@ -16,7 +21,8 @@ class Synthesizer:
with tf.variable_scope('model') as scope:
self.model = create_model(model_name, hparams)
self.model.initialize(inputs, input_lengths)
self.wav_output = audio.inv_spectrogram_tensorflow(self.model.linear_outputs[0])
self.wav_output = audio.inv_spectrogram_tensorflow(
self.model.linear_outputs[0])
self.alignment = self.model.alignments[0]
print('Loading checkpoint: %s' % checkpoint_path)
@ -25,19 +31,26 @@ class Synthesizer:
saver = tf.train.Saver()
saver.restore(self.session, checkpoint_path)
def synthesize(self, text):
cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
seq = text_to_sequence(text, cleaner_names)
feed_dict = {
self.model.inputs: [np.asarray(seq, dtype=np.int32)],
self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
self.model.inputs: [np.asarray(seq, dtype=np.int32)],
self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
}
wav, alignment = self.session.run(
[self.wav_output, self.alignment],
feed_dict=feed_dict)
wav = wav[:audio.find_endpoint(wav)]
[self.wav_output, self.alignment],
feed_dict=feed_dict
)
audio_endpoint = audio.find_endpoint(wav)
alignment_endpoint = find_alignment_endpoint(
alignment.shape, audio_endpoint / len(wav)
)
wav = wav[:audio_endpoint]
alignment = alignment[:, :alignment_endpoint]
out = io.BytesIO()
audio.save_wav(wav, out)
return out.getvalue(), alignment