Reference cleaner docs in TRAINING_DATA.md

pull/2/head
Keith Ito 2017-09-04 21:24:08 -07:00
parent edd0925213
commit c724ac2f4c
2 changed files with 3 additions and 4 deletions

View File

@ -4,7 +4,7 @@ import tensorflow as tf
# Default hyperparameters:
hparams = tf.contrib.training.HParams(
# Comma-separated list of cleaners to run on text prior to training and eval. For non-English
# text, you may want to use "basic_pipeline" or "transliteration_pipeline" See inputs/cleaners.py.
# text, you may want to use "basic_pipeline" or "transliteration_pipeline" See TRAINING_DATA.md.
cleaners='english_pipeline',
# Audio:

View File

@ -1,9 +1,8 @@
'''
Defines the set of symbols used in text input to the model.
The default works well for English. For non-English datasets, update _characters to be the set of
characters in the dataset. The "cleaners" hyperparameter should also be changed to be
"basic_pipeline" or a custom set of steps for the dataset (see cleaners.py for more info).
The default is a set of ASCII characters that works well for English or text that has been run
through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
'''
from text import cmudict