mirror of https://github.com/MycroftAI/mimic2.git
152 lines
5.5 KiB
152 lines
5.5 KiB
import numpy as np
import os
import random
import tensorflow as tf
import threading
import time
import traceback
from text import cmudict, text_to_sequence
from util.infolog import log
_batches_per_group = 32
_p_cmudict = 0.5
_pad = 0
class DataFeeder(threading.Thread):
'''Feeds batches of data into a queue on a background thread.'''
def __init__(self, coordinator, metadata_filename, hparams):
super(DataFeeder, self).__init__()
self._coord = coordinator
self._hparams = hparams
self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
self._offset = 0
# Load metadata:
self._datadir = os.path.dirname(metadata_filename)
with open(metadata_filename, encoding='utf-8') as f:
self._metadata = [line.strip().split('|') for line in f]
hours = sum((int(x[2]) for x in self._metadata)) * hparams.frame_shift_ms / (3600 * 1000)
log('Loaded metadata for %d examples (%.2f hours)' % (len(self._metadata), hours))
# Create placeholders for inputs and targets. Don't specify batch size because we want to
# be able to feed different sized batches at eval time.
self._placeholders = [
tf.placeholder(tf.int32, [None, None], 'inputs'),
tf.placeholder(tf.int32, [None], 'input_lengths'),
tf.placeholder(tf.float32, [None, None, hparams.num_mels], 'mel_targets'),
tf.placeholder(tf.float32, [None, None, hparams.num_freq], 'linear_targets')
# Create queue for buffering data:
queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32], name='input_queue')
self._enqueue_op = queue.enqueue(self._placeholders)
self.inputs, self.input_lengths, self.mel_targets, self.linear_targets = queue.dequeue()
# Load CMUDict: If enabled, this will randomly substitute some words in the training data with
# their ARPABet equivalents, which will allow you to also pass ARPABet to the model for
# synthesis (useful for proper nouns, etc.)
if hparams.use_cmudict:
cmudict_path = os.path.join(self._datadir, 'cmudict-0.7b')
if not os.path.isfile(cmudict_path):
raise Exception('If use_cmudict=True, you must download ' +
'http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b to %s' % cmudict_path)
self._cmudict = cmudict.CMUDict(cmudict_path, keep_ambiguous=False)
log('Loaded CMUDict with %d unambiguous entries' % len(self._cmudict))
self._cmudict = None
def start_in_session(self, session):
self._session = session
def run(self):
while not self._coord.should_stop():
except Exception as e:
def _enqueue_next_group(self):
start = time.time()
# Read a group of examples:
n = self._hparams.batch_size
r = self._hparams.outputs_per_step
examples = [self._get_next_example() for i in range(n * _batches_per_group)]
# Bucket examples based on similar output sequence length for efficiency:
examples.sort(key=lambda x: x[-1])
batches = [examples[i:i+n] for i in range(0, len(examples), n)]
log('Generated %d batches of size %d in %.03f sec' % (len(batches), n, time.time() - start))
for batch in batches:
feed_dict = dict(zip(self._placeholders, _prepare_batch(batch, r)))
self._session.run(self._enqueue_op, feed_dict=feed_dict)
def _get_next_example(self):
'''Loads a single example (input, mel_target, linear_target, cost) from disk'''
if self._offset >= len(self._metadata):
self._offset = 0
meta = self._metadata[self._offset]
self._offset += 1
text = meta[3]
if self._cmudict and random.random() < _p_cmudict:
text = ' '.join([self._maybe_get_arpabet(word) for word in text.split(' ')])
input_data = np.asarray(text_to_sequence(text, self._cleaner_names), dtype=np.int32)
linear_target = np.load(os.path.join(self._datadir, meta[0]))
mel_target = np.load(os.path.join(self._datadir, meta[1]))
return (input_data, mel_target, linear_target, len(linear_target))
def _maybe_get_arpabet(self, word):
arpabet = self._cmudict.lookup(word)
return '{%s}' % arpabet[0] if arpabet is not None and random.random() < 0.5 else word
def _prepare_batch(batch, outputs_per_step):
inputs = _prepare_inputs([x[0] for x in batch])
input_lengths = np.asarray([len(x[0]) for x in batch], dtype=np.int32)
mel_targets = _prepare_targets([x[1] for x in batch], outputs_per_step)
linear_targets = _prepare_targets([x[2] for x in batch], outputs_per_step)
return (inputs, input_lengths, mel_targets, linear_targets)
def _prepare_inputs(inputs):
max_len = max((len(x) for x in inputs))
return np.stack([_pad_input(x, max_len) for x in inputs])
def _prepare_targets(targets, alignment):
max_len = max((len(t) for t in targets)) + 1
return np.stack([_pad_target(t, _round_up(max_len, alignment)) for t in targets])
def _pad_input(x, length):
return np.pad(x, (0, length - x.shape[0]), mode='constant', constant_values=_pad)
def _pad_target(t, length):
return np.pad(t, [(0, length - t.shape[0]), (0,0)], mode='constant', constant_values=_pad)
def _round_up(x, multiple):
remainder = x % multiple
return x if remainder == 0 else x + multiple - remainder