TTS/synthesis.py

# -*- coding: utf-8 -*-

from network import *
from data import inv_spectrogram, find_endpoint, save_wav, spectrogram
import numpy as np
import argparse
import os
import sys
import io
from text import text_to_sequence

use_cuda = torch.cuda.is_available()


def main(args):

    # Make model
    if use_cuda:
        model = nn.DataParallel(Tacotron().cuda())

    # Load checkpoint
    try:
        checkpoint = torch.load(os.path.join(
            hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step))
        model.load_state_dict(checkpoint['model'])
        print("\n--------model restored at step %d--------\n" %
              args.restore_step)

    except:
        raise FileNotFoundError("\n------------Model not exists------------\n")

    # Evaluation
    model = model.eval()

    # Make result folder if not exists
    if not os.path.exists(hp.output_path):
        os.mkdir(hp.output_path)

    # Sentences for generation
    sentences = [
        "I try my best to translate text to speech. But I know I need more work",
        "The new Firefox, Fast for good.",
        "Technology is continually providing us with new ways to create and publish stories.",
        "For these stories to achieve their full impact, it requires tool.",
        "I am allien and I am here to destron your world."
    ]

    # Synthesis and save to wav files
    for i, text in enumerate(sentences):
        wav = generate(model, text)
        path = os.path.join(hp.output_path, 'result_%d_%d.wav' %
                            (args.restore_step, i + 1))
        with open(path, 'wb') as f:
            f.write(wav)

        f.close()
        print("save wav file at step %d ..." % (i + 1))


def generate(model, text):

    # Text to index sequence
    cleaner_names = [x.strip() for x in hp.cleaners.split(',')]
    seq = np.expand_dims(np.asarray(text_to_sequence(
        text, cleaner_names), dtype=np.int32), axis=0)

    # Provide [GO] Frame
    mel_input = np.zeros([seq.shape[0], hp.num_mels, 1], dtype=np.float32)

    # Variables
    characters = Variable(torch.from_numpy(seq).type(
        torch.cuda.LongTensor), volatile=True).cuda()
    mel_input = Variable(torch.from_numpy(mel_input).type(
        torch.cuda.FloatTensor), volatile=True).cuda()

    # Spectrogram to wav
    _, linear_output = model.forward(characters, mel_input)
    wav = inv_spectrogram(linear_output[0].data.cpu().numpy())
    wav = wav[:find_endpoint(wav)]
    out = io.BytesIO()
    save_wav(wav, out)

    return out.getvalue()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--restore_step', type=int,
                        help='Global step to restore checkpoint', default=0)
    parser.add_argument('--batch_size', type=int, help='Batch size', default=1)
    args = parser.parse_args()
    main(args)
pep8 check 2018-04-03 10:24:57 +00:00			`# -- coding: utf-8 --`
Beginning 2018-01-22 09:48:59 +00:00
			`from network import *`
			`from data import inv_spectrogram, find_endpoint, save_wav, spectrogram`
			`import numpy as np`
			`import argparse`
			`import os`
			`import sys`
			`import io`
			`from text import text_to_sequence`

			`use_cuda = torch.cuda.is_available()`


			`def main(args):`

			`# Make model`
			`if use_cuda:`
			`model = nn.DataParallel(Tacotron().cuda())`

			`# Load checkpoint`
			`try:`
			`checkpoint = torch.load(os.path.join(`
			`hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step))`
			`model.load_state_dict(checkpoint['model'])`
			`print("\n--------model restored at step %d--------\n" %`
			`args.restore_step)`

			`except:`
			`raise FileNotFoundError("\n------------Model not exists------------\n")`

			`# Evaluation`
			`model = model.eval()`

			`# Make result folder if not exists`
			`if not os.path.exists(hp.output_path):`
			`os.mkdir(hp.output_path)`

			`# Sentences for generation`
			`sentences = [`
Bug solve on attention module and a new Notebook to experiment spectrogram reconstruction 2018-01-31 15:21:22 +00:00			`"I try my best to translate text to speech. But I know I need more work",`
			`"The new Firefox, Fast for good.",`
			`"Technology is continually providing us with new ways to create and publish stories.",`
			`"For these stories to achieve their full impact, it requires tool.",`
			`"I am allien and I am here to destron your world."`
Beginning 2018-01-22 09:48:59 +00:00			`]`

			`# Synthesis and save to wav files`
			`for i, text in enumerate(sentences):`
			`wav = generate(model, text)`
			`path = os.path.join(hp.output_path, 'result_%d_%d.wav' %`
			`(args.restore_step, i + 1))`
			`with open(path, 'wb') as f:`
			`f.write(wav)`

			`f.close()`
			`print("save wav file at step %d ..." % (i + 1))`


			`def generate(model, text):`

			`# Text to index sequence`
			`cleaner_names = [x.strip() for x in hp.cleaners.split(',')]`
			`seq = np.expand_dims(np.asarray(text_to_sequence(`
			`text, cleaner_names), dtype=np.int32), axis=0)`

			`# Provide [GO] Frame`
			`mel_input = np.zeros([seq.shape[0], hp.num_mels, 1], dtype=np.float32)`

			`# Variables`
			`characters = Variable(torch.from_numpy(seq).type(`
			`torch.cuda.LongTensor), volatile=True).cuda()`
			`mel_input = Variable(torch.from_numpy(mel_input).type(`
			`torch.cuda.FloatTensor), volatile=True).cuda()`

			`# Spectrogram to wav`
			`_, linear_output = model.forward(characters, mel_input)`
			`wav = inv_spectrogram(linear_output[0].data.cpu().numpy())`
			`wav = wav[:find_endpoint(wav)]`
			`out = io.BytesIO()`
			`save_wav(wav, out)`

			`return out.getvalue()`


			`if __name__ == '__main__':`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument('--restore_step', type=int,`
			`help='Global step to restore checkpoint', default=0)`
			`parser.add_argument('--batch_size', type=int, help='Batch size', default=1)`
			`args = parser.parse_args()`
			`main(args)`