TTS/server/synthesizer.py

import io
import os
import librosa
import torch
import scipy
import numpy as np
import soundfile as sf
from utils.text import text_to_sequence
from utils.generic_utils import load_config
from utils.audio import AudioProcessor
from models.tacotron import Tacotron
from matplotlib import pylab as plt


class Synthesizer(object):
    def load_model(self, model_path, model_name, model_config, use_cuda):
        model_config = os.path.join(model_path, model_config)
        self.model_file = os.path.join(model_path, model_name)
        print(" > Loading model ...")
        print(" | > model config: ", model_config)
        print(" | > model file: ", self.model_file)
        config = load_config(model_config)
        self.config = config
        self.use_cuda = use_cuda
        self.ap = AudioProcessor(**config.audio)
        self.model = Tacotron(config.embedding_size, self.ap.num_freq, self.ap.num_mels, config.r)
        # load model state
        if use_cuda:
            cp = torch.load(self.model_file)
        else:
            cp = torch.load(
                self.model_file, map_location=lambda storage, loc: storage)
        # load the model
        self.model.load_state_dict(cp['model'])
        if use_cuda:
            self.model.cuda()
        self.model.eval()

    def save_wav(self, wav, path):
        # wav *= 32767 / max(1e-8, np.max(np.abs(wav)))
        self.ap.save_wav(wav, path)

    def tts(self, text):
        text_cleaner = [self.config.text_cleaner]
        wavs = []
        for sen in text.split('.'):
            if len(sen) < 3:
                continue
            sen = sen.strip()
            sen += '.'
            print(sen)
            sen = sen.strip()
            seq = np.array(text_to_sequence(text, text_cleaner))
            chars_var = torch.from_numpy(seq).unsqueeze(0).long()
            if self.use_cuda:
                chars_var = chars_var.cuda()
            mel_out, linear_out, alignments, stop_tokens = self.model.forward(
                chars_var)
            linear_out = linear_out[0].data.cpu().numpy()
            wav = self.ap.inv_spectrogram(linear_out.T)
            out = io.BytesIO()
            wavs.append(wav)
            wavs.append(np.zeros(10000))
        self.save_wav(wav, out)
        return out
Server component added 2018-06-05 12:15:48 +00:00			`import io`
			`import os`
			`import librosa`
			`import torch`
Remove the noise in the code 2018-06-05 14:15:57 +00:00			`import scipy`
Server component added 2018-06-05 12:15:48 +00:00			`import numpy as np`
Remove the noise in the code 2018-06-05 14:15:57 +00:00			`import soundfile as sf`
Update toy server for the recent updates 2018-11-19 14:27:22 +00:00			`from utils.text import text_to_sequence`
			`from utils.generic_utils import load_config`
			`from utils.audio import AudioProcessor`
			`from models.tacotron import Tacotron`
Server component added 2018-06-05 12:15:48 +00:00			`from matplotlib import pylab as plt`


			`class Synthesizer(object):`
			`def load_model(self, model_path, model_name, model_config, use_cuda):`
			`model_config = os.path.join(model_path, model_config)`
pep8 format all 2018-08-02 14:34:17 +00:00			`self.model_file = os.path.join(model_path, model_name)`
Server component added 2018-06-05 12:15:48 +00:00			`print(" > Loading model ...")`
			`print(" \| > model config: ", model_config)`
			`print(" \| > model file: ", self.model_file)`
			`config = load_config(model_config)`
			`self.config = config`
			`self.use_cuda = use_cuda`
Update toy server for the recent updates 2018-11-19 14:27:22 +00:00			`self.ap = AudioProcessor(**config.audio)`
			`self.model = Tacotron(config.embedding_size, self.ap.num_freq, self.ap.num_mels, config.r)`
Server component added 2018-06-05 12:15:48 +00:00			`# load model state`
			`if use_cuda:`
			`cp = torch.load(self.model_file)`
			`else:`
pep8 format all 2018-08-02 14:34:17 +00:00			`cp = torch.load(`
			`self.model_file, map_location=lambda storage, loc: storage)`
Server component added 2018-06-05 12:15:48 +00:00			`# load the model`
			`self.model.load_state_dict(cp['model'])`
			`if use_cuda:`
			`self.model.cuda()`
pep8 format all 2018-08-02 14:34:17 +00:00			`self.model.eval()`

Server component added 2018-06-05 12:15:48 +00:00			`def save_wav(self, wav, path):`
Update toy server for the recent updates 2018-11-19 14:27:22 +00:00			`# wav *= 32767 / max(1e-8, np.max(np.abs(wav)))`
			`self.ap.save_wav(wav, path)`
Server component added 2018-06-05 12:15:48 +00:00
			`def tts(self, text):`
			`text_cleaner = [self.config.text_cleaner]`
			`wavs = []`
			`for sen in text.split('.'):`
			`if len(sen) < 3:`
			`continue`
model path changes for server and string strip 2018-06-06 14:30:45 +00:00			`sen = sen.strip()`
pep8 format all 2018-08-02 14:34:17 +00:00			`sen += '.'`
Remove the noise in the code 2018-06-05 14:15:57 +00:00			`print(sen)`
Server component added 2018-06-05 12:15:48 +00:00			`sen = sen.strip()`
			`seq = np.array(text_to_sequence(text, text_cleaner))`
pep8 format all 2018-08-02 14:34:17 +00:00			`chars_var = torch.from_numpy(seq).unsqueeze(0).long()`
Server component added 2018-06-05 12:15:48 +00:00			`if self.use_cuda:`
			`chars_var = chars_var.cuda()`
pep8 format all 2018-08-02 14:34:17 +00:00			`mel_out, linear_out, alignments, stop_tokens = self.model.forward(`
			`chars_var)`
Server component added 2018-06-05 12:15:48 +00:00			`linear_out = linear_out[0].data.cpu().numpy()`
			`wav = self.ap.inv_spectrogram(linear_out.T)`
			`out = io.BytesIO()`
			`wavs.append(wav)`
			`wavs.append(np.zeros(10000))`
			`self.save_wav(wav, out)`
model path changes for server and string strip 2018-06-06 14:30:45 +00:00			`return out`