mirror of https://github.com/coqui-ai/TTS.git
Fix the bug in M-AILABS formatter
parent
d653227e59
commit
3df5d9a619
|
@ -125,6 +125,7 @@ class TTSDataset(Dataset):
|
|||
self.d_vector_mapping = d_vector_mapping
|
||||
self.language_id_mapping = language_id_mapping
|
||||
self.use_noise_augment = use_noise_augment
|
||||
|
||||
self.verbose = verbose
|
||||
self.input_seq_computed = False
|
||||
self.rescue_item_idx = 1
|
||||
|
|
|
@ -68,14 +68,19 @@ def mailabs(root_path, meta_files=None):
|
|||
recursively. Defaults to None
|
||||
"""
|
||||
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
|
||||
if meta_files is None:
|
||||
if not meta_files:
|
||||
csv_files = glob(root_path + "/**/metadata.csv", recursive=True)
|
||||
else:
|
||||
csv_files = meta_files
|
||||
|
||||
# meta_files = [f.strip() for f in meta_files.split(",")]
|
||||
items = []
|
||||
for csv_file in csv_files:
|
||||
if os.path.isfile(csv_file):
|
||||
txt_file = csv_file
|
||||
else:
|
||||
txt_file = os.path.join(root_path, csv_file)
|
||||
|
||||
folder = os.path.dirname(txt_file)
|
||||
# determine speaker based on folder structure...
|
||||
speaker_name_match = speaker_regex.search(txt_file)
|
||||
|
@ -90,7 +95,7 @@ def mailabs(root_path, meta_files=None):
|
|||
with open(txt_file, "r", encoding="utf-8") as ttf:
|
||||
for line in ttf:
|
||||
cols = line.split("|")
|
||||
if meta_files is None:
|
||||
if not meta_files:
|
||||
wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
|
||||
else:
|
||||
wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
|
||||
|
@ -98,7 +103,8 @@ def mailabs(root_path, meta_files=None):
|
|||
text = cols[1].strip()
|
||||
items.append([text, wav_file, speaker_name])
|
||||
else:
|
||||
raise RuntimeError("> File %s does not exist!" % (wav_file))
|
||||
# M-AI-Labs have some missing samples, so just print the warning
|
||||
print("> File %s does not exist!" % (wav_file))
|
||||
return items
|
||||
|
||||
|
||||
|
@ -214,7 +220,7 @@ def common_voice(root_path, meta_file, ununsed_speakers=None):
|
|||
def libri_tts(root_path, meta_files=None, ununsed_speakers=None):
|
||||
"""https://ai.google/tools/datasets/libri-tts/"""
|
||||
items = []
|
||||
if meta_files is None:
|
||||
if not meta_files:
|
||||
meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
|
||||
else:
|
||||
if isinstance(meta_files, str):
|
||||
|
|
Loading…
Reference in New Issue