update CheckSpec notebook

2021-03-24 12:52:56 +01:00 · 2021-03-24 12:52:56 +01:00 · eca67ebe13
parent dc2954e393
commit eca67ebe13
1 changed files with 38 additions and 37 deletions
--- a/notebooks/dataset_analysis/CheckSpectrograms.ipynb
+++ b/notebooks/dataset_analysis/CheckSpectrograms.ipynb
@ -2,9 +2,11 @@
 "cells": [
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
@ -14,18 +16,18 @@
    "\n",
    "import IPython.display as ipd\n",
    "import glob"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
-    "config_path = \"/home/erogol/Projects/TTS/tts/tts/config_thorsten_de.json\"\n",
-    "data_path = \"/home/erogol/Data/thorsten-german/\"\n",
+    "config_path = \"/home/erogol/gdrive/Projects/TTS/recipes/ljspeech/align_tts/config_transformer2.json\"\n",
+    "data_path = \"/home/erogol/gdrive/Datasets/LJSpeech-1.1/\"\n",
    "\n",
    "file_paths = glob.glob(data_path + \"/**/*.wav\", recursive=True)\n",
    "CONFIG = load_config(config_path)\n",
@ -37,9 +39,7 @@
    "\n",
    "print(\"File list, by index:\")\n",
    "dict(enumerate(file_paths))"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "markdown",
@ -56,9 +56,11 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "tune_params={\n",
    "#  'audio_processor': 'audio',\n",
@ -80,7 +82,7 @@
    "# These options have to be forced off in order to avoid errors about the \n",
    "# pre-calculated not matching the options being tuned.\n",
    "reset={\n",
-    " 'signal_norm': False,\n",
+    " 'signal_norm': True,  # check this if you want to test normalization parameters.\n",
    " 'stats_path': None,\n",
    " 'symmetric_norm': False,\n",
    " 'max_norm': 1,\n",
@ -93,9 +95,7 @@
    "tuned_config.update(tune_params)\n",
    "\n",
    "AP = AudioProcessor(**tuned_config);"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "markdown",
@ -108,15 +108,15 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "wav = AP.load_wav(SAMPLE_FILE_PATH)\n",
    "ipd.Audio(data=wav, rate=AP.sample_rate) "
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "markdown",
@ -129,29 +129,30 @@
  },
  {
   "cell_type": "code",
-   "source": [
-    "AP.power = 1.0"
-   ],
   "execution_count": null,
-   "outputs": []
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AP.power = 1.5"
+   ]
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "mel = AP.melspectrogram(wav)\n",
    "print(\"Max:\", mel.max())\n",
    "print(\"Min:\", mel.min())\n",
    "print(\"Mean:\", mel.mean())\n",
-    "plot_spectrogram(mel.T, AP);\n",
+    "plot_spectrogram(mel.T, AP, output_fig=True)\n",
    "\n",
    "wav_gen = AP.inv_melspectrogram(mel)\n",
    "ipd.Audio(wav_gen, rate=AP.sample_rate)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "markdown",
@ -164,21 +165,21 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "spec = AP.spectrogram(wav)\n",
    "print(\"Max:\", spec.max())\n",
    "print(\"Min:\", spec.min())\n",
    "print(\"Mean:\", spec.mean())\n",
-    "plot_spectrogram(spec.T, AP);\n",
+    "plot_spectrogram(spec.T, AP, output_fig=True)\n",
    "\n",
    "wav_gen = AP.inv_spectrogram(spec)\n",
    "ipd.Audio(wav_gen, rate=AP.sample_rate)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "markdown",
@ -193,9 +194,11 @@
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "from librosa import display\n",
    "from matplotlib import pylab as plt\n",
@ -235,31 +238,29 @@
    "        val = values[idx]\n",
    "        print(\" > {} = {}\".format(attribute, val))\n",
    "        IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
    "compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99])"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
  },
  {
   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "Collapsed": "false"
   },
+   "outputs": [],
   "source": [
-    "compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 100])"
-   ],
-   "execution_count": null,
-   "outputs": []
+    "compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 1000])"
+   ]
  }
 ],
 "metadata": {