mirror of https://github.com/coqui-ai/TTS.git
817 lines
34 KiB
Plaintext
817 lines
34 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Overview\n",
|
|
"\n",
|
|
"This notebook can be used with both a single or multi- speaker corpus and allows the interactive plotting of speaker embeddings linked to underlying audio (see instructions in the repo's speaker_embedding directory)\n",
|
|
"\n",
|
|
"Depending on the directory structure used for your corpus, you may need to adjust handling of **speaker_to_utter** and **locations**."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 2,
|
|
=======
|
|
"execution_count": null,
|
|
>>>>>>> dev
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import glob\n",
|
|
"import random\n",
|
|
"import numpy as np\n",
|
|
"import torch\n",
|
|
"import umap\n",
|
|
"\n",
|
|
"from TTS.speaker_encoder.model import SpeakerEncoder\n",
|
|
<<<<<<< HEAD
|
|
"from TTS.utils.audio import AudioProcessor\n",
|
|
"from TTS.utils.io import load_config\n",
|
|
=======
|
|
"from TTS.tts.utils.audio import AudioProcessor\n",
|
|
"from TTS.tts.utils.generic_utils import load_config\n",
|
|
>>>>>>> dev
|
|
"\n",
|
|
"from bokeh.io import output_notebook, show\n",
|
|
"from bokeh.plotting import figure\n",
|
|
"from bokeh.models import HoverTool, ColumnDataSource, BoxZoomTool, ResetTool, OpenURL, TapTool\n",
|
|
"from bokeh.transform import factor_cmap, factor_mark\n",
|
|
"from bokeh.palettes import Category10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"For larger sets of speakers, you can use **Category20**, but you need to change it in the **pal** variable too\n",
|
|
"\n",
|
|
"List of Bokeh palettes here: http://docs.bokeh.org/en/1.4.0/docs/reference/palettes.html\n",
|
|
"\n",
|
|
"**NB:** if you have problems with other palettes, first see https://stackoverflow.com/questions/48333820/why-do-some-bokeh-palettes-raise-a-valueerror-when-used-in-factor-cmap"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"\n",
|
|
" <div class=\"bk-root\">\n",
|
|
" <a href=\"https://bokeh.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
|
|
" <span id=\"1001\">Loading BokehJS ...</span>\n",
|
|
" </div>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/javascript": [
|
|
"\n",
|
|
"(function(root) {\n",
|
|
" function now() {\n",
|
|
" return new Date();\n",
|
|
" }\n",
|
|
"\n",
|
|
" var force = true;\n",
|
|
"\n",
|
|
" if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
|
|
" root._bokeh_onload_callbacks = [];\n",
|
|
" root._bokeh_is_loading = undefined;\n",
|
|
" }\n",
|
|
"\n",
|
|
" var JS_MIME_TYPE = 'application/javascript';\n",
|
|
" var HTML_MIME_TYPE = 'text/html';\n",
|
|
" var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
|
|
" var CLASS_NAME = 'output_bokeh rendered_html';\n",
|
|
"\n",
|
|
" /**\n",
|
|
" * Render data to the DOM node\n",
|
|
" */\n",
|
|
" function render(props, node) {\n",
|
|
" var script = document.createElement(\"script\");\n",
|
|
" node.appendChild(script);\n",
|
|
" }\n",
|
|
"\n",
|
|
" /**\n",
|
|
" * Handle when an output is cleared or removed\n",
|
|
" */\n",
|
|
" function handleClearOutput(event, handle) {\n",
|
|
" var cell = handle.cell;\n",
|
|
"\n",
|
|
" var id = cell.output_area._bokeh_element_id;\n",
|
|
" var server_id = cell.output_area._bokeh_server_id;\n",
|
|
" // Clean up Bokeh references\n",
|
|
" if (id != null && id in Bokeh.index) {\n",
|
|
" Bokeh.index[id].model.document.clear();\n",
|
|
" delete Bokeh.index[id];\n",
|
|
" }\n",
|
|
"\n",
|
|
" if (server_id !== undefined) {\n",
|
|
" // Clean up Bokeh references\n",
|
|
" var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
|
|
" cell.notebook.kernel.execute(cmd, {\n",
|
|
" iopub: {\n",
|
|
" output: function(msg) {\n",
|
|
" var id = msg.content.text.trim();\n",
|
|
" if (id in Bokeh.index) {\n",
|
|
" Bokeh.index[id].model.document.clear();\n",
|
|
" delete Bokeh.index[id];\n",
|
|
" }\n",
|
|
" }\n",
|
|
" }\n",
|
|
" });\n",
|
|
" // Destroy server and session\n",
|
|
" var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
|
|
" cell.notebook.kernel.execute(cmd);\n",
|
|
" }\n",
|
|
" }\n",
|
|
"\n",
|
|
" /**\n",
|
|
" * Handle when a new output is added\n",
|
|
" */\n",
|
|
" function handleAddOutput(event, handle) {\n",
|
|
" var output_area = handle.output_area;\n",
|
|
" var output = handle.output;\n",
|
|
"\n",
|
|
" // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
|
|
" if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
|
|
" return\n",
|
|
" }\n",
|
|
"\n",
|
|
" var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
|
|
"\n",
|
|
" if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
|
|
" toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
|
|
" // store reference to embed id on output_area\n",
|
|
" output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
|
|
" }\n",
|
|
" if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
|
|
" var bk_div = document.createElement(\"div\");\n",
|
|
" bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
|
|
" var script_attrs = bk_div.children[0].attributes;\n",
|
|
" for (var i = 0; i < script_attrs.length; i++) {\n",
|
|
" toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
|
|
" }\n",
|
|
" // store reference to server id on output_area\n",
|
|
" output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
|
|
" }\n",
|
|
" }\n",
|
|
"\n",
|
|
" function register_renderer(events, OutputArea) {\n",
|
|
"\n",
|
|
" function append_mime(data, metadata, element) {\n",
|
|
" // create a DOM node to render to\n",
|
|
" var toinsert = this.create_output_subarea(\n",
|
|
" metadata,\n",
|
|
" CLASS_NAME,\n",
|
|
" EXEC_MIME_TYPE\n",
|
|
" );\n",
|
|
" this.keyboard_manager.register_events(toinsert);\n",
|
|
" // Render to node\n",
|
|
" var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
|
|
" render(props, toinsert[toinsert.length - 1]);\n",
|
|
" element.append(toinsert);\n",
|
|
" return toinsert\n",
|
|
" }\n",
|
|
"\n",
|
|
" /* Handle when an output is cleared or removed */\n",
|
|
" events.on('clear_output.CodeCell', handleClearOutput);\n",
|
|
" events.on('delete.Cell', handleClearOutput);\n",
|
|
"\n",
|
|
" /* Handle when a new output is added */\n",
|
|
" events.on('output_added.OutputArea', handleAddOutput);\n",
|
|
"\n",
|
|
" /**\n",
|
|
" * Register the mime type and append_mime function with output_area\n",
|
|
" */\n",
|
|
" OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
|
|
" /* Is output safe? */\n",
|
|
" safe: true,\n",
|
|
" /* Index of renderer in `output_area.display_order` */\n",
|
|
" index: 0\n",
|
|
" });\n",
|
|
" }\n",
|
|
"\n",
|
|
" // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
|
|
" if (root.Jupyter !== undefined) {\n",
|
|
" var events = require('base/js/events');\n",
|
|
" var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
|
|
"\n",
|
|
" if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
|
|
" register_renderer(events, OutputArea);\n",
|
|
" }\n",
|
|
" }\n",
|
|
"\n",
|
|
" \n",
|
|
" if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
|
|
" root._bokeh_timeout = Date.now() + 5000;\n",
|
|
" root._bokeh_failed_load = false;\n",
|
|
" }\n",
|
|
"\n",
|
|
" var NB_LOAD_WARNING = {'data': {'text/html':\n",
|
|
" \"<div style='background-color: #fdd'>\\n\"+\n",
|
|
" \"<p>\\n\"+\n",
|
|
" \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
|
|
" \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
|
|
" \"</p>\\n\"+\n",
|
|
" \"<ul>\\n\"+\n",
|
|
" \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
|
|
" \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
|
|
" \"</ul>\\n\"+\n",
|
|
" \"<code>\\n\"+\n",
|
|
" \"from bokeh.resources import INLINE\\n\"+\n",
|
|
" \"output_notebook(resources=INLINE)\\n\"+\n",
|
|
" \"</code>\\n\"+\n",
|
|
" \"</div>\"}};\n",
|
|
"\n",
|
|
" function display_loaded() {\n",
|
|
" var el = document.getElementById(\"1001\");\n",
|
|
" if (el != null) {\n",
|
|
" el.textContent = \"BokehJS is loading...\";\n",
|
|
" }\n",
|
|
" if (root.Bokeh !== undefined) {\n",
|
|
" if (el != null) {\n",
|
|
" el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
|
|
" }\n",
|
|
" } else if (Date.now() < root._bokeh_timeout) {\n",
|
|
" setTimeout(display_loaded, 100)\n",
|
|
" }\n",
|
|
" }\n",
|
|
"\n",
|
|
"\n",
|
|
" function run_callbacks() {\n",
|
|
" try {\n",
|
|
" root._bokeh_onload_callbacks.forEach(function(callback) {\n",
|
|
" if (callback != null)\n",
|
|
" callback();\n",
|
|
" });\n",
|
|
" } finally {\n",
|
|
" delete root._bokeh_onload_callbacks\n",
|
|
" }\n",
|
|
" console.debug(\"Bokeh: all callbacks have finished\");\n",
|
|
" }\n",
|
|
"\n",
|
|
" function load_libs(css_urls, js_urls, callback) {\n",
|
|
" if (css_urls == null) css_urls = [];\n",
|
|
" if (js_urls == null) js_urls = [];\n",
|
|
"\n",
|
|
" root._bokeh_onload_callbacks.push(callback);\n",
|
|
" if (root._bokeh_is_loading > 0) {\n",
|
|
" console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
|
|
" return null;\n",
|
|
" }\n",
|
|
" if (js_urls == null || js_urls.length === 0) {\n",
|
|
" run_callbacks();\n",
|
|
" return null;\n",
|
|
" }\n",
|
|
" console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
|
|
" root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
|
|
"\n",
|
|
" function on_load() {\n",
|
|
" root._bokeh_is_loading--;\n",
|
|
" if (root._bokeh_is_loading === 0) {\n",
|
|
" console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
|
|
" run_callbacks()\n",
|
|
" }\n",
|
|
" }\n",
|
|
"\n",
|
|
" function on_error() {\n",
|
|
" console.error(\"failed to load \" + url);\n",
|
|
" }\n",
|
|
"\n",
|
|
" for (var i = 0; i < css_urls.length; i++) {\n",
|
|
" var url = css_urls[i];\n",
|
|
" const element = document.createElement(\"link\");\n",
|
|
" element.onload = on_load;\n",
|
|
" element.onerror = on_error;\n",
|
|
" element.rel = \"stylesheet\";\n",
|
|
" element.type = \"text/css\";\n",
|
|
" element.href = url;\n",
|
|
" console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
|
|
" document.body.appendChild(element);\n",
|
|
" }\n",
|
|
"\n",
|
|
" for (var i = 0; i < js_urls.length; i++) {\n",
|
|
" var url = js_urls[i];\n",
|
|
" var element = document.createElement('script');\n",
|
|
" element.onload = on_load;\n",
|
|
" element.onerror = on_error;\n",
|
|
" element.async = false;\n",
|
|
" element.src = url;\n",
|
|
" console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
|
|
" document.head.appendChild(element);\n",
|
|
" }\n",
|
|
" };var element = document.getElementById(\"1001\");\n",
|
|
" if (element == null) {\n",
|
|
" console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n",
|
|
" return false;\n",
|
|
" }\n",
|
|
"\n",
|
|
" function inject_raw_css(css) {\n",
|
|
" const element = document.createElement(\"style\");\n",
|
|
" element.appendChild(document.createTextNode(css));\n",
|
|
" document.body.appendChild(element);\n",
|
|
" }\n",
|
|
"\n",
|
|
" \n",
|
|
" var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
|
|
" var css_urls = [];\n",
|
|
" \n",
|
|
"\n",
|
|
" var inline_js = [\n",
|
|
" function(Bokeh) {\n",
|
|
" Bokeh.set_log_level(\"info\");\n",
|
|
" },\n",
|
|
" function(Bokeh) {\n",
|
|
" \n",
|
|
" \n",
|
|
" }\n",
|
|
" ];\n",
|
|
"\n",
|
|
" function run_inline_js() {\n",
|
|
" \n",
|
|
" if (root.Bokeh !== undefined || force === true) {\n",
|
|
" \n",
|
|
" for (var i = 0; i < inline_js.length; i++) {\n",
|
|
" inline_js[i].call(root, root.Bokeh);\n",
|
|
" }\n",
|
|
" if (force === true) {\n",
|
|
" display_loaded();\n",
|
|
" }} else if (Date.now() < root._bokeh_timeout) {\n",
|
|
" setTimeout(run_inline_js, 100);\n",
|
|
" } else if (!root._bokeh_failed_load) {\n",
|
|
" console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
|
|
" root._bokeh_failed_load = true;\n",
|
|
" } else if (force !== true) {\n",
|
|
" var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n",
|
|
" cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
|
|
" }\n",
|
|
"\n",
|
|
" }\n",
|
|
"\n",
|
|
" if (root._bokeh_is_loading === 0) {\n",
|
|
" console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
|
|
" run_inline_js();\n",
|
|
" } else {\n",
|
|
" load_libs(css_urls, js_urls, function() {\n",
|
|
" console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
|
|
" run_inline_js();\n",
|
|
" });\n",
|
|
" }\n",
|
|
"}(window));"
|
|
],
|
|
"application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"<div style='background-color: #fdd'>\\n\"+\n \"<p>\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"</p>\\n\"+\n \"<ul>\\n\"+\n \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n \"<li>use INLINE resources instead, as so:</li>\\n\"+\n \"</ul>\\n\"+\n \"<code>\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"</code>\\n\"+\n \"</div>\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));"
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"output_notebook()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"You should also adjust all the path constants to point at the relevant locations for you locally"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#MODEL_RUN_PATH = \"libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
|
|
"MODEL_RUN_PATH = \"libritts_360-half-September-28-2019_10+46AM-8565c50/\"\n",
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
|
|
>>>>>>> dev
|
|
"MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n",
|
|
"CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n",
|
|
"\n",
|
|
"# My single speaker locations\n",
|
|
"#EMBED_PATH = \"/home/neil/main/Projects/TTS3/embeddings/neil14/\"\n",
|
|
"#AUDIO_PATH = \"/home/neil/data/Projects/NeilTTS/neil14/wavs/\"\n",
|
|
"\n",
|
|
"# My multi speaker locations\n",
|
|
"EMBED_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360-embed_128/\"\n",
|
|
<<<<<<< HEAD
|
|
"AUDIO_PATH = \"datasets/LibriTTS/test-clean/\""
|
|
=======
|
|
"AUDIO_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360/\""
|
|
>>>>>>> dev
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"best_model.pth.tar\r\n",
|
|
"config.json\r\n",
|
|
"events.out.tfevents.1569660396.erogol-desktop\r\n"
|
|
]
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"!ls -1 $MODEL_RUN_PATH"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" > Setting up Audio Processor...\n",
|
|
" | > sample_rate:16000\n",
|
|
" | > num_mels:40\n",
|
|
" | > min_level_db:-100\n",
|
|
" | > frame_shift_ms:12.5\n",
|
|
" | > frame_length_ms:50\n",
|
|
" | > ref_level_db:20\n",
|
|
" | > fft_size:1024\n",
|
|
" | > power:None\n",
|
|
" | > preemphasis:0.98\n",
|
|
" | > griffin_lim_iters:None\n",
|
|
" | > signal_norm:True\n",
|
|
" | > symmetric_norm:True\n",
|
|
" | > mel_fmin:0\n",
|
|
" | > mel_fmax:8000.0\n",
|
|
" | > spec_gain:20.0\n",
|
|
" | > stft_pad_mode:reflect\n",
|
|
" | > max_norm:4.0\n",
|
|
" | > clip_norm:True\n",
|
|
" | > do_trim_silence:False\n",
|
|
" | > trim_db:60\n",
|
|
" | > do_sound_norm:False\n",
|
|
" | > stats_path:None\n",
|
|
" | > hop_length:200\n",
|
|
" | > win_length:800\n"
|
|
]
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"CONFIG = load_config(CONFIG_PATH)\n",
|
|
"ap = AudioProcessor(**CONFIG['audio'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Bring in the embeddings created by **compute_embeddings.py**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Embeddings found: 0\n"
|
|
]
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"embed_files = glob.glob(EMBED_PATH+\"/**/*.npy\", recursive=True)\n",
|
|
"print(f'Embeddings found: {len(embed_files)}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Check that we did indeed find an embedding"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "IndexError",
|
|
"evalue": "list index out of range",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[0;32m<ipython-input-8-f67d64b1abbb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membed_files\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
|
"\u001b[0;31mIndexError\u001b[0m: list index out of range"
|
|
]
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"embed_files[0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Process the speakers\n",
|
|
"\n",
|
|
"Assumes count of **speaker_paths** corresponds to number of speakers (so a corpus in just one directory would be treated like a single speaker and the multiple directories of LibriTTS are treated as distinct speakers)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Speaker count: 0\n"
|
|
]
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"speaker_paths = list(set([os.path.dirname(os.path.dirname(embed_file)) for embed_file in embed_files]))\n",
|
|
"speaker_to_utter = {}\n",
|
|
"for embed_file in embed_files:\n",
|
|
" speaker_path = os.path.dirname(os.path.dirname(embed_file))\n",
|
|
" try:\n",
|
|
" speaker_to_utter[speaker_path].append(embed_file)\n",
|
|
" except:\n",
|
|
" speaker_to_utter[speaker_path]=[embed_file]\n",
|
|
"print(f'Speaker count: {len(speaker_paths)}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Set up the embeddings\n",
|
|
"\n",
|
|
"Adjust the number of speakers to select and the number of utterances from each speaker and they will be randomly sampled from the corpus"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "ValueError",
|
|
"evalue": "'a' cannot be empty unless no samples are taken",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[0;32m<ipython-input-11-aabd2a5031f8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mspeaker_idxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_paths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_speakers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspeaker_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspeaker_idx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_idxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n",
|
|
"\u001b[0;31mValueError\u001b[0m: 'a' cannot be empty unless no samples are taken"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"ttsembeds = []\n",
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"embeds = []\n",
|
|
>>>>>>> dev
|
|
"labels = []\n",
|
|
"locations = []\n",
|
|
"\n",
|
|
"# single speaker \n",
|
|
"#num_speakers = 1\n",
|
|
"#num_utters = 1000\n",
|
|
"\n",
|
|
"# multi speaker\n",
|
|
"num_speakers = 10\n",
|
|
"num_utters = 20\n",
|
|
"\n",
|
|
"\n",
|
|
"speaker_idxs = np.random.choice(range(len(speaker_paths)), num_speakers, replace=False )\n",
|
|
"\n",
|
|
"for speaker_num, speaker_idx in enumerate(speaker_idxs):\n",
|
|
" speaker_path = speaker_paths[speaker_idx]\n",
|
|
" speakers_utter = speaker_to_utter[speaker_path]\n",
|
|
" utter_idxs = np.random.randint(0, len(speakers_utter) , num_utters)\n",
|
|
" for utter_idx in utter_idxs:\n",
|
|
" embed_path = speaker_to_utter[speaker_path][utter_idx]\n",
|
|
" embed = np.load(embed_path)\n",
|
|
" embeds.append(embed)\n",
|
|
" labels.append(str(speaker_num))\n",
|
|
<<<<<<< HEAD
|
|
" #locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n",
|
|
=======
|
|
" locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n",
|
|
>>>>>>> dev
|
|
"embeds = np.concatenate(embeds)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Load embeddings with UMAP"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
<<<<<<< HEAD
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "AttributeError",
|
|
"evalue": "module 'umap' has no attribute 'UMAP'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[0;32m<ipython-input-12-32709017067f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mumap\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprojection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;31mAttributeError\u001b[0m: module 'umap' has no attribute 'UMAP'"
|
|
]
|
|
}
|
|
],
|
|
=======
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
>>>>>>> dev
|
|
"source": [
|
|
"model = umap.UMAP()\n",
|
|
"projection = model.fit_transform(embeds)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Interactively charting the data in Bokeh\n",
|
|
"\n",
|
|
"Set up various details for Bokeh to plot the data\n",
|
|
"\n",
|
|
"You can use the regular Bokeh [tools](http://docs.bokeh.org/en/1.4.0/docs/user_guide/tools.html?highlight=tools) to explore the data, with reset setting it back to normal\n",
|
|
"\n",
|
|
"Once you have started the local server (see cell below) you can then click on plotted points which will open a tab to play the audio for that point, enabling easy exploration of your corpus\n",
|
|
"\n",
|
|
"File location in the tooltip is given relative to **AUDIO_PATH**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"source_wav_stems = ColumnDataSource(\n",
|
|
" data=dict(\n",
|
|
" x = projection.T[0].tolist(),\n",
|
|
" y = projection.T[1].tolist(),\n",
|
|
" desc=locations,\n",
|
|
" label=labels\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
"hover = HoverTool(\n",
|
|
" tooltips=[\n",
|
|
" (\"file\", \"@desc\"),\n",
|
|
" (\"speaker\", \"@label\"),\n",
|
|
" ]\n",
|
|
" )\n",
|
|
"\n",
|
|
"# optionally consider adding these to the tooltips if you want additional detail\n",
|
|
"# for the coordinates: (\"(x,y)\", \"($x, $y)\"),\n",
|
|
"# for the index of the embedding / wav file: (\"index\", \"$index\"),\n",
|
|
"\n",
|
|
"factors = list(set(labels))\n",
|
|
"pal_size = max(len(factors), 3)\n",
|
|
"pal = Category10[pal_size]\n",
|
|
"\n",
|
|
"p = figure(plot_width=600, plot_height=400, tools=[hover,BoxZoomTool(), ResetTool(), TapTool()])\n",
|
|
"\n",
|
|
"\n",
|
|
"p.circle('x', 'y', source=source_wav_stems, color=factor_cmap('label', palette=pal, factors=factors),)\n",
|
|
"\n",
|
|
"url = \"http://localhost:8000/@desc\"\n",
|
|
"taptool = p.select(type=TapTool)\n",
|
|
"taptool.callback = OpenURL(url=url)\n",
|
|
"\n",
|
|
"show(p)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Local server to serve wav files from corpus\n",
|
|
"\n",
|
|
"This is required so that when you click on a data point the hyperlink associated with it will be served the file locally.\n",
|
|
"\n",
|
|
"There are other ways to serve this if you prefer and you can also run the commands manually on the command line\n",
|
|
"\n",
|
|
"The server will continue to run until stopped. To stop it simply interupt the kernel (ie square button or under Kernel menu)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%cd $AUDIO_PATH\n",
|
|
"%pwd\n",
|
|
"!python -m http.server"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
<<<<<<< HEAD
|
|
"version": "3.8.5"
|
|
=======
|
|
"version": "3.7.4"
|
|
>>>>>>> dev
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|