Merge pull request #50 from keithito/trim-audio

Trim audio at the first silence
pull/2/head
Keith Ito 2017-09-26 09:53:14 -07:00 committed by GitHub
commit 9fc433a870
4 changed files with 14 additions and 10 deletions

View File

@ -86,7 +86,6 @@ if __name__ == '__main__':
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
args = parser.parse_args()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
hparams.max_iters = 100
hparams.parse(args.hparams)
print(hparams_debug_string())
synthesizer.load(args.checkpoint)

View File

@ -45,7 +45,6 @@ def main():
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
args = parser.parse_args()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
hparams.max_iters = 100
hparams.parse(args.hparams)
run_eval(args)

View File

@ -2,6 +2,7 @@ import io
import numpy as np
import tensorflow as tf
from hparams import hparams
from librosa import effects
from models import create_model
from text import text_to_sequence
from util import audio
@ -32,6 +33,8 @@ class Synthesizer:
self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
}
wav = self.session.run(self.wav_output, feed_dict=feed_dict)
wav = audio.inv_preemphasis(wav)
wav = wav[:audio.find_endpoint(wav)]
out = io.BytesIO()
audio.save_wav(audio.inv_preemphasis(wav), out)
audio.save_wav(wav, out)
return out.getvalue()

View File

@ -52,6 +52,16 @@ def melspectrogram(y):
return _normalize(S)
def find_endpoint(wav, threshold_db=-40, min_silence_sec=0.8):
window_length = int(hparams.sample_rate * min_silence_sec)
hop_length = int(window_length / 4)
threshold = _db_to_amp(threshold_db)
for x in range(hop_length, len(wav) - window_length, hop_length):
if np.max(wav[x:x+window_length]) < threshold:
return x + hop_length
return len(wav)
def _griffin_lim(S):
'''librosa implementation of Griffin-Lim
Based on https://github.com/librosa/librosa/issues/434
@ -111,7 +121,6 @@ def _stft_parameters():
# Conversions:
_mel_basis = None
_inv_mel_basis = None
def _linear_to_mel(spectrogram):
global _mel_basis
@ -119,12 +128,6 @@ def _linear_to_mel(spectrogram):
_mel_basis = _build_mel_basis()
return np.dot(_mel_basis, spectrogram)
def _mel_to_linear(mel_spectrogram):
global _inv_mel_basis
if _inv_mel_basis is None:
_inv_mel_basis = np.linalg.pinv(_build_mel_basis())
return np.maximum(1e-10, np.dot(_inv_mel_basis, mel_spectrogram))
def _build_mel_basis():
n_fft = (hparams.num_freq - 1) * 2
return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)