Reference cleaner docs in TRAINING_DATA.md

2017-09-04 21:24:08 -07:00 · 2017-09-04 21:24:08 -07:00 · c724ac2f4c
parent edd0925213
commit c724ac2f4c
2 changed files with 3 additions and 4 deletions
--- a/hparams.py
+++ b/hparams.py
@ -4,7 +4,7 @@ import tensorflow as tf
 # Default hyperparameters:
 hparams = tf.contrib.training.HParams(
  # Comma-separated list of cleaners to run on text prior to training and eval. For non-English
-  # text, you may want to use "basic_pipeline" or "transliteration_pipeline" See inputs/cleaners.py.
+  # text, you may want to use "basic_pipeline" or "transliteration_pipeline" See TRAINING_DATA.md.
  cleaners='english_pipeline',

  # Audio:
--- a/text/symbols.py
+++ b/text/symbols.py
@ -1,9 +1,8 @@
 '''
 Defines the set of symbols used in text input to the model.

-The default works well for English. For non-English datasets, update _characters to be the set of
-characters in the dataset. The "cleaners" hyperparameter should also be changed to be
-"basic_pipeline" or a custom set of steps for the dataset (see cleaners.py for more info).
+The default is a set of ASCII characters that works well for English or text that has been run
+through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
 '''
 from text import cmudict