Fix the bug in M-AILABS formatter

pull/1032/head
Edresson 2021-08-23 16:12:31 -03:00 committed by Eren Gölge
parent d653227e59
commit 3df5d9a619
2 changed files with 12 additions and 5 deletions

View File

@ -125,6 +125,7 @@ class TTSDataset(Dataset):
self.d_vector_mapping = d_vector_mapping
self.language_id_mapping = language_id_mapping
self.use_noise_augment = use_noise_augment
self.verbose = verbose
self.input_seq_computed = False
self.rescue_item_idx = 1

View File

@ -68,14 +68,19 @@ def mailabs(root_path, meta_files=None):
recursively. Defaults to None
"""
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
if meta_files is None:
if not meta_files:
csv_files = glob(root_path + "/**/metadata.csv", recursive=True)
else:
csv_files = meta_files
# meta_files = [f.strip() for f in meta_files.split(",")]
items = []
for csv_file in csv_files:
txt_file = os.path.join(root_path, csv_file)
if os.path.isfile(csv_file):
txt_file = csv_file
else:
txt_file = os.path.join(root_path, csv_file)
folder = os.path.dirname(txt_file)
# determine speaker based on folder structure...
speaker_name_match = speaker_regex.search(txt_file)
@ -90,7 +95,7 @@ def mailabs(root_path, meta_files=None):
with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split("|")
if meta_files is None:
if not meta_files:
wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
else:
wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
@ -98,7 +103,8 @@ def mailabs(root_path, meta_files=None):
text = cols[1].strip()
items.append([text, wav_file, speaker_name])
else:
raise RuntimeError("> File %s does not exist!" % (wav_file))
# M-AI-Labs have some missing samples, so just print the warning
print("> File %s does not exist!" % (wav_file))
return items
@ -214,7 +220,7 @@ def common_voice(root_path, meta_file, ununsed_speakers=None):
def libri_tts(root_path, meta_files=None, ununsed_speakers=None):
"""https://ai.google/tools/datasets/libri-tts/"""
items = []
if meta_files is None:
if not meta_files:
meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
else:
if isinstance(meta_files, str):