added docker files, and return alignment during synthesizing

2018-07-05 13:13:55 -05:00 · 2018-07-05 13:13:55 -05:00 · 45584e8a93
parent 3924367392
commit 45584e8a93
4 changed files with 65 additions and 24 deletions
--- a/cpu.Dockerfile
+++ b/cpu.Dockerfile
@ -0,0 +1,8 @@
+FROM tensorflow/tensorflow:1.5.0
+
+RUN mkdir /root/mimic2
+COPY . /root/mimic2
+WORKDIR /root/mimic2
+RUN pip install  --no-cache-dir -r requirements.txt
+
+ENTRYPOINT [ "/bin/bash" ]
--- a/eval.py
+++ b/eval.py
@ -8,15 +8,16 @@ from util import plot

 sentences = [
    # From July 8, 2017 New York Times:
-    # 'Scientists at the CERN laboratory say they have discovered a new particle.',
-    # 'There’s a way to measure the acute emotional intelligence that has never gone out of style.',
-    # 'President Trump met with other leaders at the Group of 20 conference.',
-    # 'The Senate\'s bill to repeal and replace the Affordable Care Act is now imperiled.',
-    # # From Google's Tacotron example page:
-    # 'Generative adversarial network or variational auto-encoder.',
-    # 'The buses aren\'t the problem, they actually provide a solution.',
-    # 'Does the quick brown fox jump over the lazy dog?',
-    # 'Talib Kweli confirmed to AllHipHop that he will be releasing an album in the next year.',
+    'Scientists at the CERN laboratory say they have discovered a new particle.',
+    'There’s a way to measure the acute emotional intelligence that has never gone out of style.',
+    'President Trump met with other leaders at the Group of 20 conference.',
+    'The Senate\'s bill to repeal and replace the Affordable Care Act is now imperiled.',
+    # From Google's Tacotron example page:
+    'Generative adversarial network or variational auto-encoder.',
+    'The buses aren\'t the problem, they actually provide a solution.',
+    'Does the quick brown fox jump over the lazy dog?',
+    'Talib Kweli confirmed to AllHipHop that he will be releasing an album in the next year.',
+    # From mycroft
    "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
    "Be a voice, not an echo.",
    "The human voice is the most perfect instrument of all.",
@ -57,15 +58,26 @@ def run_eval(args):

 def main():
  parser = argparse.ArgumentParser()
-  parser.add_argument('--checkpoint', required=True,
-                      help='Path to model checkpoint')
-  parser.add_argument('--hparams', default='',
-                      help='Hyperparameter overrides as a comma-separated list of name=value pairs')
-  parser.add_argument('--force_cpu', default=False,
-                      help='Force synthesize with cpu')
+  parser.add_argument(
+      '--checkpoint', required=True,
+      help='Path to model checkpoint')
+  parser.add_argument(
+      '--hparams', default='',
+      help='Hyperparameter overrides as a comma-separated list of name=value pairs')
+  parser.add_argument(
+      '--force_cpu', default=False,
+      help='Force synthesize with cpu')
+  parser.add_argument(
+      '--gpu_assignment', default='0',
+      help='Set the gpu the model should run on')
+
  args = parser.parse_args()
+
+  os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_assignment
+
  if args.force_cpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = ''
+
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  hparams.parse(args.hparams)
  run_eval(args)
--- a/gpu.Dockerfile
+++ b/gpu.Dockerfile
@ -0,0 +1,8 @@
+FROM tensorflow/tensorflow:1.5.0-gpu
+
+RUN mkdir /root/mimic2
+COPY . /root/mimic2
+WORKDIR /root/mimic2
+RUN pip install  --no-cache-dir -r requirements.txt
+
+ENTRYPOINT [ "/bin/bash" ]
--- a/synthesizer.py
+++ b/synthesizer.py
@ -1,13 +1,18 @@
 import io
+import math
 import numpy as np
 import tensorflow as tf
-from hparams import hparams
+from hparams import hparams, hparams_debug_string
 from librosa import effects
 from models import create_model
 from text import text_to_sequence
 from util import audio


+def find_alignment_endpoint(alignment_shape, ratio):
+  return math.ceil(alignment_shape[1] * ratio)
+
+
 class Synthesizer:
  def load(self, checkpoint_path, model_name='tacotron'):
    print('Constructing model: %s' % model_name)
@ -16,7 +21,8 @@ class Synthesizer:
    with tf.variable_scope('model') as scope:
      self.model = create_model(model_name, hparams)
      self.model.initialize(inputs, input_lengths)
-      self.wav_output = audio.inv_spectrogram_tensorflow(self.model.linear_outputs[0])
+      self.wav_output = audio.inv_spectrogram_tensorflow(
+          self.model.linear_outputs[0])
      self.alignment = self.model.alignments[0]

    print('Loading checkpoint: %s' % checkpoint_path)
@ -25,19 +31,26 @@ class Synthesizer:
    saver = tf.train.Saver()
    saver.restore(self.session, checkpoint_path)

-
  def synthesize(self, text):
    cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
    seq = text_to_sequence(text, cleaner_names)
    feed_dict = {
-      self.model.inputs: [np.asarray(seq, dtype=np.int32)],
-      self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
+        self.model.inputs: [np.asarray(seq, dtype=np.int32)],
+        self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
    }
    wav, alignment = self.session.run(
-      [self.wav_output, self.alignment],
-      feed_dict=feed_dict)
-    
-    wav = wav[:audio.find_endpoint(wav)]
+        [self.wav_output, self.alignment],
+        feed_dict=feed_dict
+    )
+
+    audio_endpoint = audio.find_endpoint(wav)
+    alignment_endpoint = find_alignment_endpoint(
+        alignment.shape, audio_endpoint / len(wav)
+    )
+
+    wav = wav[:audio_endpoint]
+    alignment = alignment[:, :alignment_endpoint]
+
    out = io.BytesIO()
    audio.save_wav(wav, out)
    return out.getvalue(), alignment