mirror of https://github.com/MycroftAI/mimic2.git
added docker files, and return alignment during synthesizing
parent
3924367392
commit
45584e8a93
|
@ -0,0 +1,8 @@
|
|||
FROM tensorflow/tensorflow:1.5.0
|
||||
|
||||
RUN mkdir /root/mimic2
|
||||
COPY . /root/mimic2
|
||||
WORKDIR /root/mimic2
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENTRYPOINT [ "/bin/bash" ]
|
42
eval.py
42
eval.py
|
@ -8,15 +8,16 @@ from util import plot
|
|||
|
||||
sentences = [
|
||||
# From July 8, 2017 New York Times:
|
||||
# 'Scientists at the CERN laboratory say they have discovered a new particle.',
|
||||
# 'There’s a way to measure the acute emotional intelligence that has never gone out of style.',
|
||||
# 'President Trump met with other leaders at the Group of 20 conference.',
|
||||
# 'The Senate\'s bill to repeal and replace the Affordable Care Act is now imperiled.',
|
||||
# # From Google's Tacotron example page:
|
||||
# 'Generative adversarial network or variational auto-encoder.',
|
||||
# 'The buses aren\'t the problem, they actually provide a solution.',
|
||||
# 'Does the quick brown fox jump over the lazy dog?',
|
||||
# 'Talib Kweli confirmed to AllHipHop that he will be releasing an album in the next year.',
|
||||
'Scientists at the CERN laboratory say they have discovered a new particle.',
|
||||
'There’s a way to measure the acute emotional intelligence that has never gone out of style.',
|
||||
'President Trump met with other leaders at the Group of 20 conference.',
|
||||
'The Senate\'s bill to repeal and replace the Affordable Care Act is now imperiled.',
|
||||
# From Google's Tacotron example page:
|
||||
'Generative adversarial network or variational auto-encoder.',
|
||||
'The buses aren\'t the problem, they actually provide a solution.',
|
||||
'Does the quick brown fox jump over the lazy dog?',
|
||||
'Talib Kweli confirmed to AllHipHop that he will be releasing an album in the next year.',
|
||||
# From mycroft
|
||||
"It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
||||
"Be a voice, not an echo.",
|
||||
"The human voice is the most perfect instrument of all.",
|
||||
|
@ -57,15 +58,26 @@ def run_eval(args):
|
|||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--checkpoint', required=True,
|
||||
help='Path to model checkpoint')
|
||||
parser.add_argument('--hparams', default='',
|
||||
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
|
||||
parser.add_argument('--force_cpu', default=False,
|
||||
help='Force synthesize with cpu')
|
||||
parser.add_argument(
|
||||
'--checkpoint', required=True,
|
||||
help='Path to model checkpoint')
|
||||
parser.add_argument(
|
||||
'--hparams', default='',
|
||||
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
|
||||
parser.add_argument(
|
||||
'--force_cpu', default=False,
|
||||
help='Force synthesize with cpu')
|
||||
parser.add_argument(
|
||||
'--gpu_assignment', default='0',
|
||||
help='Set the gpu the model should run on')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_assignment
|
||||
|
||||
if args.force_cpu:
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
||||
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
||||
hparams.parse(args.hparams)
|
||||
run_eval(args)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
FROM tensorflow/tensorflow:1.5.0-gpu
|
||||
|
||||
RUN mkdir /root/mimic2
|
||||
COPY . /root/mimic2
|
||||
WORKDIR /root/mimic2
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENTRYPOINT [ "/bin/bash" ]
|
|
@ -1,13 +1,18 @@
|
|||
import io
|
||||
import math
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from hparams import hparams
|
||||
from hparams import hparams, hparams_debug_string
|
||||
from librosa import effects
|
||||
from models import create_model
|
||||
from text import text_to_sequence
|
||||
from util import audio
|
||||
|
||||
|
||||
def find_alignment_endpoint(alignment_shape, ratio):
|
||||
return math.ceil(alignment_shape[1] * ratio)
|
||||
|
||||
|
||||
class Synthesizer:
|
||||
def load(self, checkpoint_path, model_name='tacotron'):
|
||||
print('Constructing model: %s' % model_name)
|
||||
|
@ -16,7 +21,8 @@ class Synthesizer:
|
|||
with tf.variable_scope('model') as scope:
|
||||
self.model = create_model(model_name, hparams)
|
||||
self.model.initialize(inputs, input_lengths)
|
||||
self.wav_output = audio.inv_spectrogram_tensorflow(self.model.linear_outputs[0])
|
||||
self.wav_output = audio.inv_spectrogram_tensorflow(
|
||||
self.model.linear_outputs[0])
|
||||
self.alignment = self.model.alignments[0]
|
||||
|
||||
print('Loading checkpoint: %s' % checkpoint_path)
|
||||
|
@ -25,19 +31,26 @@ class Synthesizer:
|
|||
saver = tf.train.Saver()
|
||||
saver.restore(self.session, checkpoint_path)
|
||||
|
||||
|
||||
def synthesize(self, text):
|
||||
cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
|
||||
seq = text_to_sequence(text, cleaner_names)
|
||||
feed_dict = {
|
||||
self.model.inputs: [np.asarray(seq, dtype=np.int32)],
|
||||
self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
|
||||
self.model.inputs: [np.asarray(seq, dtype=np.int32)],
|
||||
self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
|
||||
}
|
||||
wav, alignment = self.session.run(
|
||||
[self.wav_output, self.alignment],
|
||||
feed_dict=feed_dict)
|
||||
|
||||
wav = wav[:audio.find_endpoint(wav)]
|
||||
[self.wav_output, self.alignment],
|
||||
feed_dict=feed_dict
|
||||
)
|
||||
|
||||
audio_endpoint = audio.find_endpoint(wav)
|
||||
alignment_endpoint = find_alignment_endpoint(
|
||||
alignment.shape, audio_endpoint / len(wav)
|
||||
)
|
||||
|
||||
wav = wav[:audio_endpoint]
|
||||
alignment = alignment[:, :alignment_endpoint]
|
||||
|
||||
out = io.BytesIO()
|
||||
audio.save_wav(wav, out)
|
||||
return out.getvalue(), alignment
|
||||
|
|
Loading…
Reference in New Issue