mirror of https://github.com/coqui-ai/TTS.git
update mozilla preprocessor
parent
70eabaf4d8
commit
278c7a91b7
|
@ -41,29 +41,18 @@ def tweb(root_path, meta_file):
|
||||||
# return {'text': texts, 'wavs': wavs}
|
# return {'text': texts, 'wavs': wavs}
|
||||||
|
|
||||||
|
|
||||||
def mozilla(root_path, meta_files):
|
def mozilla(root_path, meta_file):
|
||||||
"""Normalizes Mozilla meta data files to TTS format"""
|
"""Normalizes Mozilla meta data files to TTS format"""
|
||||||
import glob
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True)
|
|
||||||
folders = [os.path.dirname(f.strip()) for f in meta_files]
|
|
||||||
items = []
|
items = []
|
||||||
for idx, meta_file in enumerate(meta_files):
|
with open(txt_file, 'r') as ttf:
|
||||||
folder = folders[idx]
|
for line in ttf:
|
||||||
# txt_file = os.path.join(root_path, meta_file)
|
cols = line.split('|')
|
||||||
txt_file = meta_file
|
batch_no = int(cols[1].strip().split("_")[0])
|
||||||
with open(txt_file, 'r') as ttf:
|
wav_folder = "batch{}".format(batch_no)
|
||||||
for line in ttf:
|
wav_file = os.path.join(root_path, wav_folder, "wavs_no_processing", cols[1].strip())
|
||||||
cols = line.split('|')
|
text = cols[0].strip()
|
||||||
# wav_file = os.path.join(root_path, folder,
|
items.append([text, wav_file])
|
||||||
# 'wavs_no_processing', cols[1].strip())
|
|
||||||
wav_file = os.path.join(folder, 'wavs_no_processing',
|
|
||||||
cols[1].strip())
|
|
||||||
if os.path.isfile(wav_file):
|
|
||||||
text = cols[0].strip()
|
|
||||||
items.append([text, wav_file])
|
|
||||||
else:
|
|
||||||
print(" > Error: {}".format(wav_file))
|
|
||||||
continue
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue