mirror of https://github.com/coqui-ai/TTS.git
Update AnalyzeDataset notebook
parent
1f0c8179da
commit
ec4b03c045
|
@ -8,7 +8,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"TTS_PATH = \"/home/erogol/projects/\""
|
||||
"# TTS_PATH = \"/home/erogol/projects/\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -21,7 +21,6 @@
|
|||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.append(TTS_PATH) # set this if TTS is not installed globally\n",
|
||||
"import librosa\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
|
@ -30,6 +29,8 @@
|
|||
"from multiprocessing import Pool\n",
|
||||
"from matplotlib import pylab as plt\n",
|
||||
"from collections import Counter\n",
|
||||
"from TTS.config.shared_configs import BaseDatasetConfig\n",
|
||||
"from TTS.tts.datasets import load_tts_samples\n",
|
||||
"from TTS.tts.datasets.formatters import *\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
|
@ -42,22 +43,29 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DATA_PATH = \"/home/erogol/Data/m-ai-labs/de_DE/by_book/male/karlsson/\"\n",
|
||||
"META_DATA = [\"kleinzaches/metadata.csv\",\n",
|
||||
" \"spiegel_kaetzchen/metadata.csv\",\n",
|
||||
" \"herrnarnesschatz/metadata.csv\",\n",
|
||||
" \"maedchen_von_moorhof/metadata.csv\",\n",
|
||||
" \"koenigsgaukler/metadata.csv\",\n",
|
||||
" \"altehous/metadata.csv\",\n",
|
||||
" \"odysseus/metadata.csv\",\n",
|
||||
" \"undine/metadata.csv\",\n",
|
||||
" \"reise_tilsit/metadata.csv\",\n",
|
||||
" \"schmied_seines_glueckes/metadata.csv\",\n",
|
||||
" \"kammmacher/metadata.csv\",\n",
|
||||
" \"unterm_birnbaum/metadata.csv\",\n",
|
||||
" \"liebesbriefe/metadata.csv\",\n",
|
||||
" \"sandmann/metadata.csv\"]\n",
|
||||
"NUM_PROC = 8"
|
||||
"NUM_PROC = 8\n",
|
||||
"DATASET_CONFIG = BaseDatasetConfig(\n",
|
||||
" name=\"ljspeech\", meta_file_train=\"metadata.csv\", path=\"/home/ubuntu/TTS/depot/data/male_dataset1_44k/\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def formatter(root_path, meta_file, **kwargs): # pylint: disable=unused-argument\n",
|
||||
" txt_file = os.path.join(root_path, meta_file)\n",
|
||||
" items = []\n",
|
||||
" speaker_name = \"maledataset1\"\n",
|
||||
" with open(txt_file, \"r\", encoding=\"utf-8\") as ttf:\n",
|
||||
" for line in ttf:\n",
|
||||
" cols = line.split(\"|\")\n",
|
||||
" wav_file = os.path.join(root_path, \"wavs\", cols[0])\n",
|
||||
" text = cols[1]\n",
|
||||
" items.append([text, wav_file, speaker_name])\n",
|
||||
" return items"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -69,8 +77,10 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# use your own preprocessor at this stage - TTS/datasets/proprocess.py\n",
|
||||
"items = mailabs(DATA_PATH, META_DATA)\n",
|
||||
"print(\" > Number of audio files: {}\".format(len(items)))"
|
||||
"train_samples, eval_samples = load_tts_samples(DATASET_CONFIG, eval_split=True, formatter=formatter)\n",
|
||||
"items = train_samples + eval_samples\n",
|
||||
"print(\" > Number of audio files: {}\".format(len(items)))\n",
|
||||
"print(items[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -103,6 +113,15 @@
|
|||
"print([item for item, count in c.items() if count > 1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"item"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -112,11 +131,9 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"def load_item(item):\n",
|
||||
" file_name = item[1].strip()\n",
|
||||
" text = item[0].strip()\n",
|
||||
" audio = librosa.load(file_name, sr=None)\n",
|
||||
" sr = audio[1]\n",
|
||||
" audio = audio[0]\n",
|
||||
" file_name = item[1].strip()\n",
|
||||
" audio, sr = librosa.load(file_name, sr=None)\n",
|
||||
" audio_len = len(audio) / sr\n",
|
||||
" text_len = len(text)\n",
|
||||
" return file_name, text, text_len, audio, audio_len\n",
|
||||
|
@ -374,11 +391,18 @@
|
|||
"# fequency bar plot - it takes time!!\n",
|
||||
"w_count_df.plot.bar()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -392,7 +416,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Loading…
Reference in New Issue