mirror of https://github.com/coqui-ai/TTS.git
update CheckSpec notebook
parent
dc2954e393
commit
eca67ebe13
|
@ -2,9 +2,11 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
|
@ -14,18 +16,18 @@
|
|||
"\n",
|
||||
"import IPython.display as ipd\n",
|
||||
"import glob"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config_path = \"/home/erogol/Projects/TTS/tts/tts/config_thorsten_de.json\"\n",
|
||||
"data_path = \"/home/erogol/Data/thorsten-german/\"\n",
|
||||
"config_path = \"/home/erogol/gdrive/Projects/TTS/recipes/ljspeech/align_tts/config_transformer2.json\"\n",
|
||||
"data_path = \"/home/erogol/gdrive/Datasets/LJSpeech-1.1/\"\n",
|
||||
"\n",
|
||||
"file_paths = glob.glob(data_path + \"/**/*.wav\", recursive=True)\n",
|
||||
"CONFIG = load_config(config_path)\n",
|
||||
|
@ -37,9 +39,7 @@
|
|||
"\n",
|
||||
"print(\"File list, by index:\")\n",
|
||||
"dict(enumerate(file_paths))"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
@ -56,9 +56,11 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tune_params={\n",
|
||||
"# 'audio_processor': 'audio',\n",
|
||||
|
@ -80,7 +82,7 @@
|
|||
"# These options have to be forced off in order to avoid errors about the \n",
|
||||
"# pre-calculated not matching the options being tuned.\n",
|
||||
"reset={\n",
|
||||
" 'signal_norm': False,\n",
|
||||
" 'signal_norm': True, # check this if you want to test normalization parameters.\n",
|
||||
" 'stats_path': None,\n",
|
||||
" 'symmetric_norm': False,\n",
|
||||
" 'max_norm': 1,\n",
|
||||
|
@ -93,9 +95,7 @@
|
|||
"tuned_config.update(tune_params)\n",
|
||||
"\n",
|
||||
"AP = AudioProcessor(**tuned_config);"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
@ -108,15 +108,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wav = AP.load_wav(SAMPLE_FILE_PATH)\n",
|
||||
"ipd.Audio(data=wav, rate=AP.sample_rate) "
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
@ -129,29 +129,30 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"AP.power = 1.0"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"AP.power = 1.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mel = AP.melspectrogram(wav)\n",
|
||||
"print(\"Max:\", mel.max())\n",
|
||||
"print(\"Min:\", mel.min())\n",
|
||||
"print(\"Mean:\", mel.mean())\n",
|
||||
"plot_spectrogram(mel.T, AP);\n",
|
||||
"plot_spectrogram(mel.T, AP, output_fig=True)\n",
|
||||
"\n",
|
||||
"wav_gen = AP.inv_melspectrogram(mel)\n",
|
||||
"ipd.Audio(wav_gen, rate=AP.sample_rate)"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
@ -164,21 +165,21 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spec = AP.spectrogram(wav)\n",
|
||||
"print(\"Max:\", spec.max())\n",
|
||||
"print(\"Min:\", spec.min())\n",
|
||||
"print(\"Mean:\", spec.mean())\n",
|
||||
"plot_spectrogram(spec.T, AP);\n",
|
||||
"plot_spectrogram(spec.T, AP, output_fig=True)\n",
|
||||
"\n",
|
||||
"wav_gen = AP.inv_spectrogram(spec)\n",
|
||||
"ipd.Audio(wav_gen, rate=AP.sample_rate)"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
@ -193,9 +194,11 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from librosa import display\n",
|
||||
"from matplotlib import pylab as plt\n",
|
||||
|
@ -235,31 +238,29 @@
|
|||
" val = values[idx]\n",
|
||||
" print(\" > {} = {}\".format(attribute, val))\n",
|
||||
" IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99])"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 100])"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
"compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 1000])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Reference in New Issue