linter updates

pull/3/head
Eren Gölge 2021-02-05 13:10:43 +00:00
parent 8871c111d2
commit d49757faaa
17 changed files with 122 additions and 116 deletions

View File

@ -16,6 +16,7 @@ from TTS.utils.io import load_config
if __name__ == '__main__': if __name__ == '__main__':
# pylint: disable=bad-continuation
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='''Extract attention masks from trained Tacotron/Tacotron2 models. description='''Extract attention masks from trained Tacotron/Tacotron2 models.
These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n''' These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n'''

View File

@ -179,7 +179,6 @@ def main():
# load models # load models
synthesizer = Synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, args.use_cuda) synthesizer = Synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, args.use_cuda)
use_griffin_lim = vocoder_path is None
print(" > Text: {}".format(args.text)) print(" > Text: {}".format(args.text))
# # handle multi-speaker setting # # handle multi-speaker setting

View File

@ -34,7 +34,9 @@ print(" > Using CUDA: ", use_cuda)
print(" > Number of GPUs: ", num_gpus) print(" > Number of GPUs: ", num_gpus)
def setup_loader(ap: AudioProcessor, is_val: bool=False, verbose: bool=False): def setup_loader(ap: AudioProcessor,
is_val: bool = False,
verbose: bool = False):
if is_val: if is_val:
loader = None loader = None
else: else:
@ -254,8 +256,7 @@ if __name__ == '__main__':
if args.restore_path: if args.restore_path:
new_fields["restore_path"] = args.restore_path new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch() new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, OUT_PATH, copy_model_files(c, args.config_path, OUT_PATH, new_fields)
new_fields)
LOG_DIR = OUT_PATH LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name='Speaker_Encoder') tb_logger = TensorboardLogger(LOG_DIR, model_name='Speaker_Encoder')

View File

@ -119,7 +119,7 @@ def format_data(data):
avg_text_length, avg_spec_length, attn_mask, item_idx avg_text_length, avg_spec_length, attn_mask, item_idx
def data_depended_init(data_loader, model, ap): def data_depended_init(data_loader, model):
"""Data depended initialization for activation normalization.""" """Data depended initialization for activation normalization."""
if hasattr(model, 'module'): if hasattr(model, 'module'):
for f in model.module.decoder.flows: for f in model.module.decoder.flows:
@ -138,7 +138,7 @@ def data_depended_init(data_loader, model, ap):
# format data # format data
text_input, text_lengths, mel_input, mel_lengths, spekaer_embed,\ text_input, text_lengths, mel_input, mel_lengths, spekaer_embed,\
_, _, attn_mask, item_idx = format_data(data) _, _, attn_mask, _ = format_data(data)
# forward pass model # forward pass model
_ = model.forward( _ = model.forward(
@ -177,7 +177,7 @@ def train(data_loader, model, criterion, optimizer, scheduler,
# format data # format data
text_input, text_lengths, mel_input, mel_lengths, speaker_c,\ text_input, text_lengths, mel_input, mel_lengths, speaker_c,\
avg_text_length, avg_spec_length, attn_mask, item_idx = format_data(data) avg_text_length, avg_spec_length, attn_mask, _ = format_data(data)
loader_time = time.time() - end_time loader_time = time.time() - end_time
@ -332,7 +332,7 @@ def evaluate(data_loader, model, criterion, ap, global_step, epoch):
# format data # format data
text_input, text_lengths, mel_input, mel_lengths, speaker_c,\ text_input, text_lengths, mel_input, mel_lengths, speaker_c,\
_, _, attn_mask, item_idx = format_data(data) _, _, attn_mask, _ = format_data(data)
# forward pass model # forward pass model
z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward( z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward(
@ -550,13 +550,14 @@ def main(args): # pylint: disable=redefined-outer-name
eval_loader = setup_loader(ap, 1, is_val=True, verbose=True) eval_loader = setup_loader(ap, 1, is_val=True, verbose=True)
global_step = args.restore_step global_step = args.restore_step
model = data_depended_init(train_loader, model, ap) model = data_depended_init(train_loader, model)
for epoch in range(0, c.epochs): for epoch in range(0, c.epochs):
c_logger.print_epoch_start(epoch, c.epochs) c_logger.print_epoch_start(epoch, c.epochs)
train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer, train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer,
scheduler, ap, global_step, scheduler, ap, global_step,
epoch) epoch)
eval_avg_loss_dict = evaluate(eval_loader , model, criterion, ap, global_step, epoch) eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap,
global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict) c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = train_avg_loss_dict['avg_loss'] target_loss = train_avg_loss_dict['avg_loss']
if c.run_eval: if c.run_eval:
@ -632,8 +633,7 @@ if __name__ == '__main__':
if args.restore_path: if args.restore_path:
new_fields["restore_path"] = args.restore_path new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch() new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, copy_model_files(c, args.config_path, OUT_PATH, new_fields)
OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775) os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775) os.chmod(OUT_PATH, 0o775)

View File

@ -518,7 +518,8 @@ def main(args): # pylint: disable=redefined-outer-name
train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer, train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer,
scheduler, ap, global_step, scheduler, ap, global_step,
epoch) epoch)
eval_avg_loss_dict = evaluate(eval_loader , model, criterion, ap, global_step, epoch) eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap,
global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict) c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = train_avg_loss_dict['avg_loss'] target_loss = train_avg_loss_dict['avg_loss']
if c.run_eval: if c.run_eval:

View File

@ -180,8 +180,8 @@ def train(data_loader, model, criterion, optimizer, optimizer_st, scheduler,
loss_dict = criterion(postnet_output, decoder_output, mel_input, loss_dict = criterion(postnet_output, decoder_output, mel_input,
linear_input, stop_tokens, stop_targets, linear_input, stop_tokens, stop_targets,
mel_lengths, decoder_backward_output, mel_lengths, decoder_backward_output,
alignments, alignment_lengths, alignments_backward, alignments, alignment_lengths,
text_lengths) alignments_backward, text_lengths)
# check nan loss # check nan loss
if torch.isnan(loss_dict['loss']).any(): if torch.isnan(loss_dict['loss']).any():
@ -535,7 +535,6 @@ def main(args): # pylint: disable=redefined-outer-name
# setup criterion # setup criterion
criterion = TacotronLoss(c, stopnet_pos_weight=c.stopnet_pos_weight, ga_sigma=0.4) criterion = TacotronLoss(c, stopnet_pos_weight=c.stopnet_pos_weight, ga_sigma=0.4)
if args.restore_path: if args.restore_path:
checkpoint = torch.load(args.restore_path, map_location='cpu') checkpoint = torch.load(args.restore_path, map_location='cpu')
try: try:
@ -706,8 +705,7 @@ if __name__ == '__main__':
if args.restore_path: if args.restore_path:
new_fields["restore_path"] = args.restore_path new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch() new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, copy_model_files(c, args.config_path, OUT_PATH, new_fields)
OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775) os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775) os.chmod(OUT_PATH, 0o775)

View File

@ -33,9 +33,8 @@ use_cuda, num_gpus = setup_torch_training_env(True, True)
def setup_loader(ap, is_val=False, verbose=False): def setup_loader(ap, is_val=False, verbose=False):
if is_val and not c.run_eval:
loader = None loader = None
else: if not is_val or c.run_eval:
dataset = GANDataset(ap=ap, dataset = GANDataset(ap=ap,
items=eval_data if is_val else train_data, items=eval_data if is_val else train_data,
seq_len=c.seq_len, seq_len=c.seq_len,
@ -639,8 +638,7 @@ if __name__ == '__main__':
if args.restore_path: if args.restore_path:
new_fields["restore_path"] = args.restore_path new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch() new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, copy_model_files(c, args.config_path, OUT_PATH, new_fields)
OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775) os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775) os.chmod(OUT_PATH, 0o775)

View File

@ -54,7 +54,6 @@ def setup_loader(ap, is_val=False, verbose=False):
if is_val else c.num_loader_workers, if is_val else c.num_loader_workers,
pin_memory=False) pin_memory=False)
return loader return loader
@ -79,8 +78,8 @@ def format_test_data(data):
return m, x return m, x
def train(model, criterion, optimizer, def train(model, criterion, optimizer, scheduler, scaler, ap, global_step,
scheduler, scaler, ap, global_step, epoch): epoch):
data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0))
model.train() model.train()
epoch_time = 0 epoch_time = 0
@ -94,7 +93,8 @@ def train(model, criterion, optimizer,
c_logger.print_train_start() c_logger.print_train_start()
# setup noise schedule # setup noise schedule
noise_schedule = c['train_noise_schedule'] noise_schedule = c['train_noise_schedule']
betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps']) betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'],
noise_schedule['num_steps'])
if hasattr(model, 'module'): if hasattr(model, 'module'):
model.module.compute_noise_level(betas) model.module.compute_noise_level(betas)
else: else:
@ -205,7 +205,8 @@ def train(model, criterion, optimizer,
epoch, epoch,
OUT_PATH, OUT_PATH,
model_losses=loss_dict, model_losses=loss_dict,
scaler=scaler.state_dict() if c.mixed_precision else None) scaler=scaler.state_dict()
if c.mixed_precision else None)
end_time = time.time() end_time = time.time()
@ -246,7 +247,6 @@ def evaluate(model, criterion, ap, global_step, epoch):
else: else:
noise, x_noisy, noise_scale = model.compute_y_n(x) noise, x_noisy, noise_scale = model.compute_y_n(x)
# forward pass # forward pass
noise_hat = model(x_noisy, m, noise_scale) noise_hat = model(x_noisy, m, noise_scale)
@ -254,7 +254,6 @@ def evaluate(model, criterion, ap, global_step, epoch):
loss = criterion(noise, noise_hat) loss = criterion(noise, noise_hat)
loss_wavegrad_dict = {'wavegrad_loss': loss} loss_wavegrad_dict = {'wavegrad_loss': loss}
loss_dict = dict() loss_dict = dict()
for key, value in loss_wavegrad_dict.items(): for key, value in loss_wavegrad_dict.items():
if isinstance(value, (int, float)): if isinstance(value, (int, float)):
@ -284,7 +283,9 @@ def evaluate(model, criterion, ap, global_step, epoch):
# setup noise schedule and inference # setup noise schedule and inference
noise_schedule = c['test_noise_schedule'] noise_schedule = c['test_noise_schedule']
betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps']) betas = np.linspace(noise_schedule['min_val'],
noise_schedule['max_val'],
noise_schedule['num_steps'])
if hasattr(model, 'module'): if hasattr(model, 'module'):
model.module.compute_noise_level(betas) model.module.compute_noise_level(betas)
# compute voice # compute voice
@ -315,7 +316,8 @@ def main(args): # pylint: disable=redefined-outer-name
print(f" > Loading wavs from: {c.data_path}") print(f" > Loading wavs from: {c.data_path}")
if c.feature_path is not None: if c.feature_path is not None:
print(f" > Loading features from: {c.feature_path}") print(f" > Loading features from: {c.feature_path}")
eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path, c.eval_split_size) eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path,
c.eval_split_size)
else: else:
eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size) eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size)
@ -395,14 +397,13 @@ def main(args): # pylint: disable=redefined-outer-name
global_step = args.restore_step global_step = args.restore_step
for epoch in range(0, c.epochs): for epoch in range(0, c.epochs):
c_logger.print_epoch_start(epoch, c.epochs) c_logger.print_epoch_start(epoch, c.epochs)
_, global_step = train(model, criterion, optimizer, _, global_step = train(model, criterion, optimizer, scheduler, scaler,
scheduler, scaler, ap, global_step, ap, global_step, epoch)
epoch) eval_avg_loss_dict = evaluate(model, criterion, ap, global_step, epoch)
eval_avg_loss_dict = evaluate(model, criterion, ap,
global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict) c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = eval_avg_loss_dict[c.target_loss] target_loss = eval_avg_loss_dict[c.target_loss]
best_loss = save_best_model(target_loss, best_loss = save_best_model(
target_loss,
best_loss, best_loss,
model, model,
optimizer, optimizer,
@ -486,8 +487,7 @@ if __name__ == '__main__':
if args.restore_path: if args.restore_path:
new_fields["restore_path"] = args.restore_path new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch() new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, copy_model_files(c, args.config_path, OUT_PATH, new_fields)
OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775) os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775) os.chmod(OUT_PATH, 0o775)

View File

@ -200,12 +200,9 @@ def train(model, optimizer, criterion, scheduler, scaler, ap, global_step, epoch
train_data[rand_idx], (tuple, list)) else train_data[rand_idx][0] train_data[rand_idx], (tuple, list)) else train_data[rand_idx][0]
wav = ap.load_wav(wav_path) wav = ap.load_wav(wav_path)
ground_mel = ap.melspectrogram(wav) ground_mel = ap.melspectrogram(wav)
sample_wav = model.inference(ground_mel, sample_wav = model.inference(ground_mel, c.batched,
c.batched, c.target_samples, c.overlap_samples,
c.target_samples, use_cuda)
c.overlap_samples,
use_cuda
)
predict_mel = ap.melspectrogram(sample_wav) predict_mel = ap.melspectrogram(sample_wav)
# compute spectrograms # compute spectrograms
@ -287,12 +284,8 @@ def evaluate(model, criterion, ap, global_step, epoch):
eval_data[rand_idx], (tuple, list)) else eval_data[rand_idx][0] eval_data[rand_idx], (tuple, list)) else eval_data[rand_idx][0]
wav = ap.load_wav(wav_path) wav = ap.load_wav(wav_path)
ground_mel = ap.melspectrogram(wav) ground_mel = ap.melspectrogram(wav)
sample_wav = model.inference(ground_mel, sample_wav = model.inference(ground_mel, c.batched, c.target_samples,
c.batched, c.overlap_samples, use_cuda)
c.target_samples,
c.overlap_samples,
use_cuda
)
predict_mel = ap.melspectrogram(sample_wav) predict_mel = ap.melspectrogram(sample_wav)
# Sample audio # Sample audio

View File

@ -87,5 +87,3 @@ for base in tqdm(cartesian_product(base_values, repeat=args.num_iter), total=tot
best_schedule = {'beta': beta} best_schedule = {'beta': beta}
print(f" > Found a better schedule. - MSE: {mse.item()}") print(f" > Found a better schedule. - MSE: {mse.item()}")
np.save(args.output_path, best_schedule) np.save(args.output_path, best_schedule)

View File

@ -1,10 +1,9 @@
import numpy
import numpy as np
import queue import queue
import torch
import random import random
import numpy as np
import torch
from torch.utils.data import Dataset from torch.utils.data import Dataset
from tqdm import tqdm
class MyDataset(Dataset): class MyDataset(Dataset):
@ -155,7 +154,7 @@ class MyDataset(Dataset):
# add random gaussian noise # add random gaussian noise
if self.additive_noise > 0: if self.additive_noise > 0:
noises_ = [numpy.random.normal(0, self.additive_noise, size=len(w)) for w in wavs_] noises_ = [np.random.normal(0, self.additive_noise, size=len(w)) for w in wavs_]
wavs_ = [wavs_[i] + noises_[i] for i in range(len(wavs_))] wavs_ = [wavs_[i] + noises_[i] for i in range(len(wavs_))]
# get a random subset of each of the wavs and convert to MFCC. # get a random subset of each of the wavs and convert to MFCC.

View File

@ -114,4 +114,3 @@ def check_config_speaker_encoder(c):
check_argument('path', dataset_entry, restricted=True, val_type=str) check_argument('path', dataset_entry, restricted=True, val_type=str)
check_argument('meta_file_train', dataset_entry, restricted=True, val_type=[str, list]) check_argument('meta_file_train', dataset_entry, restricted=True, val_type=[str, list])
check_argument('meta_file_val', dataset_entry, restricted=True, val_type=str) check_argument('meta_file_val', dataset_entry, restricted=True, val_type=str)

View File

@ -90,7 +90,8 @@ class MyDataset(Dataset):
return data return data
@staticmethod @staticmethod
def _generate_and_cache_phoneme_sequence(text, cache_path, cleaners, language, tp, add_blank): def _generate_and_cache_phoneme_sequence(text, cache_path, cleaners,
language, tp, add_blank):
"""generate a phoneme sequence from text. """generate a phoneme sequence from text.
since the usage is for subsequent caching, we never add bos and since the usage is for subsequent caching, we never add bos and
eos chars here. Instead we add those dynamically later; based on the eos chars here. Instead we add those dynamically later; based on the
@ -98,13 +99,16 @@ class MyDataset(Dataset):
phonemes = phoneme_to_sequence(text, [cleaners], phonemes = phoneme_to_sequence(text, [cleaners],
language=language, language=language,
enable_eos_bos=False, enable_eos_bos=False,
tp=tp, add_blank=add_blank) tp=tp,
add_blank=add_blank)
phonemes = np.asarray(phonemes, dtype=np.int32) phonemes = np.asarray(phonemes, dtype=np.int32)
np.save(cache_path, phonemes) np.save(cache_path, phonemes)
return phonemes return phonemes
@staticmethod @staticmethod
def _load_or_generate_phoneme_sequence(wav_file, text, phoneme_cache_path, enable_eos_bos, cleaners, language, tp, add_blank): def _load_or_generate_phoneme_sequence(wav_file, text, phoneme_cache_path,
enable_eos_bos, cleaners, language,
tp, add_blank):
file_name = os.path.splitext(os.path.basename(wav_file))[0] file_name = os.path.splitext(os.path.basename(wav_file))[0]
# different names for normal phonemes and with blank chars. # different names for normal phonemes and with blank chars.
@ -143,11 +147,15 @@ class MyDataset(Dataset):
if not self.input_seq_computed: if not self.input_seq_computed:
if self.use_phonemes: if self.use_phonemes:
text = self._load_or_generate_phoneme_sequence(wav_file, text, self.phoneme_cache_path, self.enable_eos_bos, self.cleaners, self.phoneme_language, self.tp, self.add_blank) text = self._load_or_generate_phoneme_sequence(
wav_file, text, self.phoneme_cache_path,
self.enable_eos_bos, self.cleaners, self.phoneme_language,
self.tp, self.add_blank)
else: else:
text = np.asarray(text_to_sequence(text, [self.cleaners], text = np.asarray(text_to_sequence(text, [self.cleaners],
tp=self.tp, add_blank=self.add_blank), tp=self.tp,
add_blank=self.add_blank),
dtype=np.int32) dtype=np.int32)
assert text.size > 0, self.items[idx][1] assert text.size > 0, self.items[idx][1]
@ -177,7 +185,8 @@ class MyDataset(Dataset):
item = args[0] item = args[0]
func_args = args[1] func_args = args[1]
text, wav_file, *_ = item text, wav_file, *_ = item
phonemes = MyDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args) phonemes = MyDataset._load_or_generate_phoneme_sequence(
wav_file, text, *func_args)
return phonemes return phonemes
def compute_input_seq(self, num_workers=0): def compute_input_seq(self, num_workers=0):
@ -188,13 +197,18 @@ class MyDataset(Dataset):
print(" | > Computing input sequences ...") print(" | > Computing input sequences ...")
for idx, item in enumerate(tqdm.tqdm(self.items)): for idx, item in enumerate(tqdm.tqdm(self.items)):
text, *_ = item text, *_ = item
sequence = np.asarray(text_to_sequence(text, [self.cleaners], sequence = np.asarray(text_to_sequence(
tp=self.tp, add_blank=self.add_blank), text, [self.cleaners],
tp=self.tp,
add_blank=self.add_blank),
dtype=np.int32) dtype=np.int32)
self.items[idx][0] = sequence self.items[idx][0] = sequence
else: else:
func_args = [self.phoneme_cache_path, self.enable_eos_bos, self.cleaners, self.phoneme_language, self.tp, self.add_blank] func_args = [
self.phoneme_cache_path, self.enable_eos_bos, self.cleaners,
self.phoneme_language, self.tp, self.add_blank
]
if self.verbose: if self.verbose:
print(" | > Computing phonemes ...") print(" | > Computing phonemes ...")
if num_workers == 0: if num_workers == 0:
@ -203,7 +217,11 @@ class MyDataset(Dataset):
self.items[idx][0] = phonemes self.items[idx][0] = phonemes
else: else:
with Pool(num_workers) as p: with Pool(num_workers) as p:
phonemes = list(tqdm.tqdm(p.imap(MyDataset._phoneme_worker, [[item, func_args] for item in self.items]), total=len(self.items))) phonemes = list(
tqdm.tqdm(p.imap(MyDataset._phoneme_worker,
[[item, func_args]
for item in self.items]),
total=len(self.items)))
for idx, p in enumerate(phonemes): for idx, p in enumerate(phonemes):
self.items[idx][0] = p self.items[idx][0] = p

View File

@ -6,8 +6,6 @@ import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
import torch
def get_git_branch(): def get_git_branch():
try: try:

View File

@ -54,6 +54,9 @@
"mulaw": false, // apply mulaw if mode is bits "mulaw": false, // apply mulaw if mode is bits
"padding": 2, // pad the input for resnet to see wider input length "padding": 2, // pad the input for resnet to see wider input length
// GENERATOR - for backward compatibility
"generator_model": "WaveRNN",
// DATASET // DATASET
//"use_gta": true, // use computed gta features from the tts model //"use_gta": true, // use computed gta features from the tts model
"data_path": "tests/data/ljspeech/wavs/", // path containing training wav files "data_path": "tests/data/ljspeech/wavs/", // path containing training wav files