From 78c3897599d415d9312f57d61fde715feacef595 Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Tue, 23 Jul 2019 09:47:52 +0200 Subject: [PATCH 1/3] root path speaker matching added data root path in speaker matching for mailabs, this way you don't need to start at the very bottom of the folder hierarchy if you want to explicitly define metafiles. --- datasets/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/preprocess.py b/datasets/preprocess.py index 9dd7a610..a3701c4d 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -82,14 +82,14 @@ def mailabs(root_path, meta_files=None): # meta_files = [f.strip() for f in meta_files.split(",")] items = [] for idx, csv_file in enumerate(csv_files): + txt_file = os.path.join(root_path, csv_file) # determine speaker based on folder structure... - speaker_name_match = speaker_regex.search(csv_file) + speaker_name_match = speaker_regex.search(txt_file) if speaker_name_match is None: continue speaker_name = speaker_name_match.group("speaker_name") print(" | > {}".format(csv_file)) folder = folders[idx] - txt_file = os.path.join(root_path, csv_file) with open(txt_file, 'r') as ttf: for line in ttf: cols = line.split('|') From 4c9fbeeaf81c2df8461fe5f35225ae9ecd0728a9 Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Tue, 23 Jul 2019 14:23:36 +0200 Subject: [PATCH 2/3] simplified folder variable --- datasets/preprocess.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datasets/preprocess.py b/datasets/preprocess.py index a3701c4d..a86f8e5d 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -75,21 +75,19 @@ def mailabs(root_path, meta_files=None): speaker_regex = re.compile("by_book/(male|female)/(?P[^/]+)/") if meta_files is None: csv_files = glob(root_path+"/**/metadata.csv", recursive=True) - folders = [os.path.dirname(f) for f in csv_files] else: csv_files = meta_files - folders = [f.strip().split("by_book")[1][1:] for f in csv_files] # meta_files = [f.strip() for f in meta_files.split(",")] items = [] for idx, csv_file in enumerate(csv_files): txt_file = os.path.join(root_path, csv_file) + folder = os.path.dirname(txt_file) # determine speaker based on folder structure... speaker_name_match = speaker_regex.search(txt_file) if speaker_name_match is None: continue speaker_name = speaker_name_match.group("speaker_name") print(" | > {}".format(csv_file)) - folder = folders[idx] with open(txt_file, 'r') as ttf: for line in ttf: cols = line.split('|') From d4045fd47b1dc5939d6100c9f8a2faf3863fc1fc Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Tue, 23 Jul 2019 14:30:06 +0200 Subject: [PATCH 3/3] unused var --- datasets/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/preprocess.py b/datasets/preprocess.py index a86f8e5d..e5f4e1a2 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -79,7 +79,7 @@ def mailabs(root_path, meta_files=None): csv_files = meta_files # meta_files = [f.strip() for f in meta_files.split(",")] items = [] - for idx, csv_file in enumerate(csv_files): + for csv_file in csv_files: txt_file = os.path.join(root_path, csv_file) folder = os.path.dirname(txt_file) # determine speaker based on folder structure...