diff --git a/datasets/preprocess.py b/datasets/preprocess.py index bf1c8d97..9dd7a610 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -72,7 +72,7 @@ def mozilla(root_path, meta_file): def mailabs(root_path, meta_files=None): """Normalizes M-AI-Labs meta data files to TTS format""" - speaker_regex = re.compile("by_book/(male|female|mix)/(?P[^/]+)/") + speaker_regex = re.compile("by_book/(male|female)/(?P[^/]+)/") if meta_files is None: csv_files = glob(root_path+"/**/metadata.csv", recursive=True) folders = [os.path.dirname(f) for f in csv_files] @@ -83,7 +83,10 @@ def mailabs(root_path, meta_files=None): items = [] for idx, csv_file in enumerate(csv_files): # determine speaker based on folder structure... - speaker_name = speaker_regex.search(csv_file).group("speaker_name") + speaker_name_match = speaker_regex.search(csv_file) + if speaker_name_match is None: + continue + speaker_name = speaker_name_match.group("speaker_name") print(" | > {}".format(csv_file)) folder = folders[idx] txt_file = os.path.join(root_path, csv_file)