#-*- coding: utf-8 -*- from network import * from data import inv_spectrogram, find_endpoint, save_wav, spectrogram import numpy as np import argparse import os import sys import io from text import text_to_sequence use_cuda = torch.cuda.is_available() def main(args): # Make model if use_cuda: model = nn.DataParallel(Tacotron().cuda()) # Load checkpoint try: checkpoint = torch.load(os.path.join( hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) model.load_state_dict(checkpoint['model']) print("\n--------model restored at step %d--------\n" % args.restore_step) except: raise FileNotFoundError("\n------------Model not exists------------\n") # Evaluation model = model.eval() # Make result folder if not exists if not os.path.exists(hp.output_path): os.mkdir(hp.output_path) # Sentences for generation sentences = [ "I try my best to translate text to speech. But I know I need more work", "The new Firefox, Fast for good.", "Technology is continually providing us with new ways to create and publish stories.", "For these stories to achieve their full impact, it requires tool.", "I am allien and I am here to destron your world." ] # Synthesis and save to wav files for i, text in enumerate(sentences): wav = generate(model, text) path = os.path.join(hp.output_path, 'result_%d_%d.wav' % (args.restore_step, i + 1)) with open(path, 'wb') as f: f.write(wav) f.close() print("save wav file at step %d ..." % (i + 1)) def generate(model, text): # Text to index sequence cleaner_names = [x.strip() for x in hp.cleaners.split(',')] seq = np.expand_dims(np.asarray(text_to_sequence( text, cleaner_names), dtype=np.int32), axis=0) # Provide [GO] Frame mel_input = np.zeros([seq.shape[0], hp.num_mels, 1], dtype=np.float32) # Variables characters = Variable(torch.from_numpy(seq).type( torch.cuda.LongTensor), volatile=True).cuda() mel_input = Variable(torch.from_numpy(mel_input).type( torch.cuda.FloatTensor), volatile=True).cuda() # Spectrogram to wav _, linear_output = model.forward(characters, mel_input) wav = inv_spectrogram(linear_output[0].data.cpu().numpy()) wav = wav[:find_endpoint(wav)] out = io.BytesIO() save_wav(wav, out) return out.getvalue() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--restore_step', type=int, help='Global step to restore checkpoint', default=0) parser.add_argument('--batch_size', type=int, help='Batch size', default=1) args = parser.parse_args() main(args)