diff --git a/config.json b/config.json index 1b873b98..89a30a6b 100644 --- a/config.json +++ b/config.json @@ -25,5 +25,5 @@ "text_cleaner": "english_cleaners", "data_path": "/data/shared/KeithIto/LJSpeech-1.0", - "output_path": "./result" + "output_path": "result" } diff --git a/datasets/.LJSpeech.py.swp b/datasets/.LJSpeech.py.swp index b80a29a9..c909a602 100644 Binary files a/datasets/.LJSpeech.py.swp and b/datasets/.LJSpeech.py.swp differ diff --git a/datasets/LJSpeech.py b/datasets/LJSpeech.py index 7f9aca36..6202c570 100644 --- a/datasets/LJSpeech.py +++ b/datasets/LJSpeech.py @@ -4,7 +4,6 @@ import numpy as np import collections from torch.utils.data import Dataset -import train_config as c from Tacotron.utils.text import text_to_sequence from Tacotron.utils.audio import * from Tacotron.utils.data import prepare_data, pad_data, pad_per_step @@ -12,16 +11,19 @@ from Tacotron.utils.data import prepare_data, pad_data, pad_per_step class LJSpeechDataset(Dataset): - def __init__(self, csv_file, root_dir, outputs_per_step): + def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate, + cleaners): self.frames = pd.read_csv(csv_file, sep='|', header=None) self.root_dir = root_dir self.outputs_per_step = outputs_per_step + self.sample_rate = sample_rate + self.cleaners = cleaners print(" > Reading LJSpeech from - {}".format(root_dir)) print(" | > Number of instances : {}".format(len(self.frames))) def load_wav(self, filename): try: - audio = librosa.load(filename, sr=c.sample_rate) + audio = librosa.load(filename, sr=self.sample_rate) return audio except RuntimeError as e: print(" !! Cannot read file : {}".format(filename)) @@ -33,7 +35,7 @@ class LJSpeechDataset(Dataset): wav_name = os.path.join(self.root_dir, self.frames.ix[idx, 0]) + '.wav' text = self.frames.ix[idx, 1] - text = np.asarray(text_to_sequence(text, [c.cleaners]), dtype=np.int32) + text = np.asarray(text_to_sequence(text, [self.cleaners]), dtype=np.int32) wav = np.asarray(self.load_wav(wav_name)[0], dtype=np.float32) sample = {'text': text, 'wav': wav} return sample diff --git a/train.py b/train.py index 6ca1f1fe..69545300 100644 --- a/train.py +++ b/train.py @@ -42,7 +42,9 @@ def main(args): dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata.csv'), os.path.join(c.data_path, 'wavs'), - c.r + c.r, + c.sample_rate, + c.text_cleaner ) model = Tacotron(c.embedding_size, diff --git a/utils/.generic_utils.py.swp b/utils/.generic_utils.py.swp index 4b46ae8a..a1e05635 100644 Binary files a/utils/.generic_utils.py.swp and b/utils/.generic_utils.py.swp differ diff --git a/utils/audio.py b/utils/audio.py index 1d6b24f7..49a5bc34 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -1,8 +1,6 @@ import librosa import numpy as np from scipy import signal -import Tacotron.train_config as c - _mel_basis = None diff --git a/utils/generic_utils.py b/utils/generic_utils.py index e5fc0cb4..4a10b9a2 100644 --- a/utils/generic_utils.py +++ b/utils/generic_utils.py @@ -33,7 +33,7 @@ def remove_experiment_folder(experiment_path): """Check folder if there is a checkpoint, otherwise remove the folder""" checkpoint_files = glob.glob(experiment_path+"/*.pth.tar") - if len(checkpoint_files) < 2: + if len(checkpoint_files) < 1: shutil.rmtree(experiment_path) print(" ! Run is removed from {}".format(experiment_path)) else: