diff --git a/notebooks/dataset_analysis/AnalyzeDataset.ipynb b/notebooks/dataset_analysis/AnalyzeDataset.ipynb index c2aabbf9..e08f3ab3 100644 --- a/notebooks/dataset_analysis/AnalyzeDataset.ipynb +++ b/notebooks/dataset_analysis/AnalyzeDataset.ipynb @@ -8,7 +8,7 @@ }, "outputs": [], "source": [ - "TTS_PATH = \"/home/erogol/projects/\"" + "# TTS_PATH = \"/home/erogol/projects/\"" ] }, { @@ -21,7 +21,6 @@ "source": [ "import os\n", "import sys\n", - "sys.path.append(TTS_PATH) # set this if TTS is not installed globally\n", "import librosa\n", "import numpy as np\n", "import pandas as pd\n", @@ -30,6 +29,8 @@ "from multiprocessing import Pool\n", "from matplotlib import pylab as plt\n", "from collections import Counter\n", + "from TTS.config.shared_configs import BaseDatasetConfig\n", + "from TTS.tts.datasets import load_tts_samples\n", "from TTS.tts.datasets.formatters import *\n", "%matplotlib inline" ] @@ -42,22 +43,29 @@ }, "outputs": [], "source": [ - "DATA_PATH = \"/home/erogol/Data/m-ai-labs/de_DE/by_book/male/karlsson/\"\n", - "META_DATA = [\"kleinzaches/metadata.csv\",\n", - " \"spiegel_kaetzchen/metadata.csv\",\n", - " \"herrnarnesschatz/metadata.csv\",\n", - " \"maedchen_von_moorhof/metadata.csv\",\n", - " \"koenigsgaukler/metadata.csv\",\n", - " \"altehous/metadata.csv\",\n", - " \"odysseus/metadata.csv\",\n", - " \"undine/metadata.csv\",\n", - " \"reise_tilsit/metadata.csv\",\n", - " \"schmied_seines_glueckes/metadata.csv\",\n", - " \"kammmacher/metadata.csv\",\n", - " \"unterm_birnbaum/metadata.csv\",\n", - " \"liebesbriefe/metadata.csv\",\n", - " \"sandmann/metadata.csv\"]\n", - "NUM_PROC = 8" + "NUM_PROC = 8\n", + "DATASET_CONFIG = BaseDatasetConfig(\n", + " name=\"ljspeech\", meta_file_train=\"metadata.csv\", path=\"/home/ubuntu/TTS/depot/data/male_dataset1_44k/\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def formatter(root_path, meta_file, **kwargs): # pylint: disable=unused-argument\n", + " txt_file = os.path.join(root_path, meta_file)\n", + " items = []\n", + " speaker_name = \"maledataset1\"\n", + " with open(txt_file, \"r\", encoding=\"utf-8\") as ttf:\n", + " for line in ttf:\n", + " cols = line.split(\"|\")\n", + " wav_file = os.path.join(root_path, \"wavs\", cols[0])\n", + " text = cols[1]\n", + " items.append([text, wav_file, speaker_name])\n", + " return items" ] }, { @@ -69,8 +77,10 @@ "outputs": [], "source": [ "# use your own preprocessor at this stage - TTS/datasets/proprocess.py\n", - "items = mailabs(DATA_PATH, META_DATA)\n", - "print(\" > Number of audio files: {}\".format(len(items)))" + "train_samples, eval_samples = load_tts_samples(DATASET_CONFIG, eval_split=True, formatter=formatter)\n", + "items = train_samples + eval_samples\n", + "print(\" > Number of audio files: {}\".format(len(items)))\n", + "print(items[1])" ] }, { @@ -103,6 +113,15 @@ "print([item for item, count in c.items() if count > 1])" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "item" + ] + }, { "cell_type": "code", "execution_count": null, @@ -112,11 +131,9 @@ "outputs": [], "source": [ "def load_item(item):\n", - " file_name = item[1].strip()\n", " text = item[0].strip()\n", - " audio = librosa.load(file_name, sr=None)\n", - " sr = audio[1]\n", - " audio = audio[0]\n", + " file_name = item[1].strip()\n", + " audio, sr = librosa.load(file_name, sr=None)\n", " audio_len = len(audio) / sr\n", " text_len = len(text)\n", " return file_name, text, text_len, audio, audio_len\n", @@ -374,11 +391,18 @@ "# fequency bar plot - it takes time!!\n", "w_count_df.plot.bar()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -392,7 +416,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.1" } }, "nbformat": 4,