diff --git a/.gitignore b/.gitignore
index b0fe0bee..e1e9fbd4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+WadaSNR/
.idea/
*.pyc
.DS_Store
diff --git a/dataset_analysis/CheckDatasetSNR.ipynb b/dataset_analysis/CheckDatasetSNR.ipynb
index f9676a38..5588cdd6 100644
--- a/dataset_analysis/CheckDatasetSNR.ipynb
+++ b/dataset_analysis/CheckDatasetSNR.ipynb
@@ -8,13 +8,19 @@
"\n",
"To use this notebook, you need:\n",
"- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/\n",
+ " 1. extract in the same folder as this notebook\n",
+ " 2. under MacOS you'll have to rebuild the executable. In the build folder: 1) remove existing .o files and 2) run make\n",
+ "\n",
+ "\n",
"- FFMPEG: ```sudo apt-get install ffmpeg ``` \n"
]
},
{
"cell_type": "code",
- "execution_count": 1,
- "metadata": {},
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"import os, sys\n",
@@ -25,13 +31,17 @@
"import soundfile as sf\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
- "from multiprocessing import Pool"
+ "from multiprocessing import Pool\n",
+ "from matplotlib import pylab as plt\n",
+ "%matplotlib inline"
]
},
{
"cell_type": "code",
- "execution_count": 2,
- "metadata": {},
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"# Set the meta parameters\n",
@@ -42,36 +52,37 @@
},
{
"cell_type": "code",
- "execution_count": 3,
- "metadata": {},
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"def compute_file_snr(file_path):\n",
- " \"\"\" Convert given file to required format with FFMPEG\n",
- " and process with WADA\n",
- " \"\"\"\n",
+ " \"\"\" Convert given file to required format with FFMPEG and process with WADA.\"\"\"\n",
" _, sr = sf.read(file_path)\n",
" new_file = file_path.replace(\".wav\", \"_tmp.wav\")\n",
" if sr != 16000:\n",
- " command = 'ffmpeg -i \"{}\" -ac 1 -acodec pcm_s16le -y -ar {} \"{}\"'.format(file_path, 16000, new_file)\n",
+ " command = f'ffmpeg -i \"{file_path}\" -ac 1 -acodec pcm_s16le -y -ar 16000 \"{new_file}\"'\n",
" else:\n",
- " command = f'cp {file_path} {new_file}'\n",
+ " command = f'cp \"{file_path}\" \"{new_file}\"'\n",
" os.system(command)\n",
- " command = [f\"{CURRENT_PATH}/WadaSNR/Exe/WADASNR\", f'-i {new_file}', f'-t {CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt', '-ifmt mswav']\n",
- " pipe = subprocess.Popen(\" \".join(command), shell=True, stdout=subprocess.PIPE).stdout\n",
- " output = pipe.read()\n",
+ " command = [f'\"{CURRENT_PATH}/WadaSNR/Exe/WADASNR\"', f'-i \"{new_file}\"', f'-t \"{CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt\"', '-ifmt mswav']\n",
+ " output = subprocess.check_output(\" \".join(command), shell=True)\n",
" try:\n",
" output = float(output.split()[-3].decode(\"utf-8\"))\n",
" except:\n",
" raise RuntimeError(\" \".join(command))\n",
- " os.system(\"rm {}\".format(new_file))\n",
+ " os.system(f'rm \"{new_file}\"')\n",
" return output, file_path\n"
]
},
{
"cell_type": "code",
- "execution_count": 4,
- "metadata": {},
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"wav_file = \"/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav\"\n",
@@ -80,35 +91,19 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > Number of wav files 13331\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "wav_files = glob.glob(\"{}/**/*.wav\".format(DATA_PATH), recursive=True)\n",
- "print(\" > Number of wav files {}\".format(len(wav_files)))"
+ "wav_files = glob.glob(f\"{DATA_PATH}/**/*.wav\", recursive=True)\n",
+ "print(f\" > Number of wav files {len(wav_files)}\")"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "13331it [22:53, 9.71it/s]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"if NUM_PROC == 1:\n",
" file_snrs = [None] * len(wav_files) \n",
@@ -122,17 +117,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > Average SNR of the dataset:65.62862835473709\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"snrs = [tup[0] for tup in file_snrs]\n",
"\n",
@@ -142,532 +129,79 @@
"file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs]\n",
"file_names = [tup[1] for tup in file_snrs]\n",
"snrs = [tup[0] for tup in file_snrs]\n",
+ "file_idxs = np.argsort(snrs)\n",
"\n",
- "print(\" > Average SNR of the dataset:{}\".format(np.mean(snrs)))"
+ "\n",
+ "print(f\" > Average SNR of the dataset:{np.mean(snrs)}\")"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def output_snr_with_audio(idx):\n",
+ " file_idx = file_idxs[idx]\n",
+ " file_name = file_names[file_idx]\n",
+ " wav, sr = sf.read(file_name)\n",
+ " # multi channel to single channel\n",
+ " if len(wav.shape) == 2:\n",
+ " wav = wav[:, 0]\n",
+ " print(f\" > {file_name} - snr:{snrs[file_idx]}\")\n",
+ " IPython.display.display(IPython.display.Audio(wav, rate=sr))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000011.wav - snr:17.236514\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000105.wav - snr:17.620196\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000048.wav - snr:17.771109\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000084.wav - snr:18.852683\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000034.wav - snr:19.213945\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000101.wav - snr:19.526065\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000229.wav - snr:19.785003\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000033.wav - snr:19.811579\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_03_f000007.wav - snr:20.052098\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000031.wav - snr:20.055627\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# find worse SNR files\n",
"N = 10 # number of files to fetch\n",
- "file_idxs = np.argsort(snrs)\n",
"for i in range(N):\n",
- " idx = file_idxs[i]\n",
- " file_name = file_names[idx]\n",
- " wav, sr = sf.read(file_name)\n",
- " print(\" > {} - snr:{}\".format(file_name, snrs[idx]))\n",
- " IPython.display.display(IPython.display.Audio(wav, rate=sr))"
+ " output_snr_with_audio(i)"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/kleine_lord/wavs/kleine_lord_04_f000156.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_15_f000107.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_26_f000133.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_33_f000102.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000200.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000193.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000048.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_04_f000145.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000006.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000108.wav - snr:100.0\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# find best recordings\n",
"N = 10 # number of files to fetch\n",
- "file_idxs = np.argsort(-1 * np.array(snrs))\n",
"for i in range(N):\n",
- " idx = file_idxs[i]\n",
- " file_name = file_names[idx]\n",
- " wav, sr = sf.read(file_name)\n",
- " print(\" > {} - snr:{}\".format(file_name, snrs[idx]))\n",
- " IPython.display.display(IPython.display.Audio(wav, rate=sr))"
+ " output_snr_with_audio(-i-1)"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.hist(snrs, bins=100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3(mztts)",
"language": "python",
- "name": "python3"
+ "name": "mztts"
},
"language_info": {
"codemirror_mode": {
@@ -679,7 +213,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.2"
+ "version": "3.6.8"
}
},
"nbformat": 4,