From 3df5d9a619d3c860452944acbd3edb524923da98 Mon Sep 17 00:00:00 2001
From: Edresson <edresson1@gmail.com>
Date: Mon, 23 Aug 2021 16:12:31 -0300
Subject: [PATCH] Fix the bug in M-AILABS formatter

---
 TTS/tts/datasets/dataset.py    |  1 +
 TTS/tts/datasets/formatters.py | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py
index 7ba97eba..78c6c33d 100644
--- a/TTS/tts/datasets/dataset.py
+++ b/TTS/tts/datasets/dataset.py
@@ -125,6 +125,7 @@ class TTSDataset(Dataset):
         self.d_vector_mapping = d_vector_mapping
         self.language_id_mapping = language_id_mapping
         self.use_noise_augment = use_noise_augment
+
         self.verbose = verbose
         self.input_seq_computed = False
         self.rescue_item_idx = 1
diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index 51ad892a..651b3197 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -68,14 +68,19 @@ def mailabs(root_path, meta_files=None):
             recursively. Defaults to None
     """
     speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
-    if meta_files is None:
+    if not meta_files:
         csv_files = glob(root_path + "/**/metadata.csv", recursive=True)
     else:
         csv_files = meta_files
+
     # meta_files = [f.strip() for f in meta_files.split(",")]
     items = []
     for csv_file in csv_files:
-        txt_file = os.path.join(root_path, csv_file)
+        if os.path.isfile(csv_file):
+            txt_file = csv_file
+        else:
+            txt_file = os.path.join(root_path, csv_file)
+
         folder = os.path.dirname(txt_file)
         # determine speaker based on folder structure...
         speaker_name_match = speaker_regex.search(txt_file)
@@ -90,7 +95,7 @@ def mailabs(root_path, meta_files=None):
         with open(txt_file, "r", encoding="utf-8") as ttf:
             for line in ttf:
                 cols = line.split("|")
-                if meta_files is None:
+                if not meta_files:
                     wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
                 else:
                     wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
@@ -98,7 +103,8 @@ def mailabs(root_path, meta_files=None):
                     text = cols[1].strip()
                     items.append([text, wav_file, speaker_name])
                 else:
-                    raise RuntimeError("> File %s does not exist!" % (wav_file))
+                    # M-AI-Labs have some missing samples, so just print the warning
+                    print("> File %s does not exist!" % (wav_file))
     return items
 
 
@@ -214,7 +220,7 @@ def common_voice(root_path, meta_file, ununsed_speakers=None):
 def libri_tts(root_path, meta_files=None, ununsed_speakers=None):
     """https://ai.google/tools/datasets/libri-tts/"""
     items = []
-    if meta_files is None:
+    if not meta_files:
         meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
     else:
         if isinstance(meta_files, str):