diff --git a/datasets/preprocess.py b/datasets/preprocess.py
index 9dd7a610..e5f4e1a2 100644
--- a/datasets/preprocess.py
+++ b/datasets/preprocess.py
@@ -75,21 +75,19 @@ def mailabs(root_path, meta_files=None):
     speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
     if meta_files is None:
         csv_files = glob(root_path+"/**/metadata.csv", recursive=True)
-        folders = [os.path.dirname(f) for f in csv_files]
     else:
         csv_files = meta_files
-        folders = [f.strip().split("by_book")[1][1:] for f in csv_files]
     # meta_files = [f.strip() for f in meta_files.split(",")]
     items = []
-    for idx, csv_file in enumerate(csv_files):
+    for csv_file in csv_files:
+        txt_file = os.path.join(root_path, csv_file)
+        folder = os.path.dirname(txt_file)
         # determine speaker based on folder structure...
-        speaker_name_match = speaker_regex.search(csv_file)
+        speaker_name_match = speaker_regex.search(txt_file)
         if speaker_name_match is None:
             continue
         speaker_name = speaker_name_match.group("speaker_name")
         print(" | > {}".format(csv_file))
-        folder = folders[idx]
-        txt_file = os.path.join(root_path, csv_file)
         with open(txt_file, 'r') as ttf:
             for line in ttf:
                 cols = line.split('|')