diff --git a/.gitignore b/.gitignore index b0fe0bee..e1e9fbd4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +WadaSNR/ .idea/ *.pyc .DS_Store diff --git a/dataset_analysis/CheckDatasetSNR.ipynb b/dataset_analysis/CheckDatasetSNR.ipynb index f9676a38..5588cdd6 100644 --- a/dataset_analysis/CheckDatasetSNR.ipynb +++ b/dataset_analysis/CheckDatasetSNR.ipynb @@ -8,13 +8,19 @@ "\n", "To use this notebook, you need:\n", "- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/\n", + " 1. extract in the same folder as this notebook\n", + " 2. under MacOS you'll have to rebuild the executable. In the build folder: 1) remove existing .o files and 2) run make\n", + "\n", + "\n", "- FFMPEG: ```sudo apt-get install ffmpeg ``` \n" ] }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, + "execution_count": null, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import os, sys\n", @@ -25,13 +31,17 @@ "import soundfile as sf\n", "import numpy as np\n", "from tqdm import tqdm\n", - "from multiprocessing import Pool" + "from multiprocessing import Pool\n", + "from matplotlib import pylab as plt\n", + "%matplotlib inline" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": null, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Set the meta parameters\n", @@ -42,36 +52,37 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "execution_count": null, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def compute_file_snr(file_path):\n", - " \"\"\" Convert given file to required format with FFMPEG\n", - " and process with WADA\n", - " \"\"\"\n", + " \"\"\" Convert given file to required format with FFMPEG and process with WADA.\"\"\"\n", " _, sr = sf.read(file_path)\n", " new_file = file_path.replace(\".wav\", \"_tmp.wav\")\n", " if sr != 16000:\n", - " command = 'ffmpeg -i \"{}\" -ac 1 -acodec pcm_s16le -y -ar {} \"{}\"'.format(file_path, 16000, new_file)\n", + " command = f'ffmpeg -i \"{file_path}\" -ac 1 -acodec pcm_s16le -y -ar 16000 \"{new_file}\"'\n", " else:\n", - " command = f'cp {file_path} {new_file}'\n", + " command = f'cp \"{file_path}\" \"{new_file}\"'\n", " os.system(command)\n", - " command = [f\"{CURRENT_PATH}/WadaSNR/Exe/WADASNR\", f'-i {new_file}', f'-t {CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt', '-ifmt mswav']\n", - " pipe = subprocess.Popen(\" \".join(command), shell=True, stdout=subprocess.PIPE).stdout\n", - " output = pipe.read()\n", + " command = [f'\"{CURRENT_PATH}/WadaSNR/Exe/WADASNR\"', f'-i \"{new_file}\"', f'-t \"{CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt\"', '-ifmt mswav']\n", + " output = subprocess.check_output(\" \".join(command), shell=True)\n", " try:\n", " output = float(output.split()[-3].decode(\"utf-8\"))\n", " except:\n", " raise RuntimeError(\" \".join(command))\n", - " os.system(\"rm {}\".format(new_file))\n", + " os.system(f'rm \"{new_file}\"')\n", " return output, file_path\n" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": null, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "wav_file = \"/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav\"\n", @@ -80,35 +91,19 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > Number of wav files 13331\n" - ] - } - ], + "outputs": [], "source": [ - "wav_files = glob.glob(\"{}/**/*.wav\".format(DATA_PATH), recursive=True)\n", - "print(\" > Number of wav files {}\".format(len(wav_files)))" + "wav_files = glob.glob(f\"{DATA_PATH}/**/*.wav\", recursive=True)\n", + "print(f\" > Number of wav files {len(wav_files)}\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "13331it [22:53, 9.71it/s]\n" - ] - } - ], + "outputs": [], "source": [ "if NUM_PROC == 1:\n", " file_snrs = [None] * len(wav_files) \n", @@ -122,17 +117,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > Average SNR of the dataset:65.62862835473709\n" - ] - } - ], + "outputs": [], "source": [ "snrs = [tup[0] for tup in file_snrs]\n", "\n", @@ -142,532 +129,79 @@ "file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs]\n", "file_names = [tup[1] for tup in file_snrs]\n", "snrs = [tup[0] for tup in file_snrs]\n", + "file_idxs = np.argsort(snrs)\n", "\n", - "print(\" > Average SNR of the dataset:{}\".format(np.mean(snrs)))" + "\n", + "print(f\" > Average SNR of the dataset:{np.mean(snrs)}\")" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def output_snr_with_audio(idx):\n", + " file_idx = file_idxs[idx]\n", + " file_name = file_names[file_idx]\n", + " wav, sr = sf.read(file_name)\n", + " # multi channel to single channel\n", + " if len(wav.shape) == 2:\n", + " wav = wav[:, 0]\n", + " print(f\" > {file_name} - snr:{snrs[file_idx]}\")\n", + " IPython.display.display(IPython.display.Audio(wav, rate=sr))" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000011.wav - snr:17.236514\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000105.wav - snr:17.620196\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000048.wav - snr:17.771109\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000084.wav - snr:18.852683\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000034.wav - snr:19.213945\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000101.wav - snr:19.526065\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000229.wav - snr:19.785003\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000033.wav - snr:19.811579\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_03_f000007.wav - snr:20.052098\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000031.wav - snr:20.055627\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# find worse SNR files\n", "N = 10 # number of files to fetch\n", - "file_idxs = np.argsort(snrs)\n", "for i in range(N):\n", - " idx = file_idxs[i]\n", - " file_name = file_names[idx]\n", - " wav, sr = sf.read(file_name)\n", - " print(\" > {} - snr:{}\".format(file_name, snrs[idx]))\n", - " IPython.display.display(IPython.display.Audio(wav, rate=sr))" + " output_snr_with_audio(i)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/kleine_lord/wavs/kleine_lord_04_f000156.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_15_f000107.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_26_f000133.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_33_f000102.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000200.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000193.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000048.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_04_f000145.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000006.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000108.wav - snr:100.0\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# find best recordings\n", "N = 10 # number of files to fetch\n", - "file_idxs = np.argsort(-1 * np.array(snrs))\n", "for i in range(N):\n", - " idx = file_idxs[i]\n", - " file_name = file_names[idx]\n", - " wav, sr = sf.read(file_name)\n", - " print(\" > {} - snr:{}\".format(file_name, snrs[idx]))\n", - " IPython.display.display(IPython.display.Audio(wav, rate=sr))" + " output_snr_with_audio(-i-1)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(snrs, bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3(mztts)", "language": "python", - "name": "python3" + "name": "mztts" }, "language_info": { "codemirror_mode": { @@ -679,7 +213,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.6.8" } }, "nbformat": 4,