Rename "pipeline" to "cleaners"

No need to introduce new terminology.
2017-09-04 21:54:23 -07:00 · 2017-09-04 21:54:23 -07:00 · c4e14ad3b9
parent 9e1ea7a879
commit c4e14ad3b9
4 changed files with 18 additions and 18 deletions
--- a/TRAINING_DATA.md
+++ b/TRAINING_DATA.md
@ -50,21 +50,21 @@ following the example of the other preprocessors in that file.
 ### Non-English Data

 If your training data is in a language other than English, you will probably want to change the
-text cleaning pipeline by setting the `cleaners` hyperparameter.
+text cleaners by setting the `cleaners` hyperparameter.

  * If your text is in a Latin script or can be transliterated to ASCII using the
    [Unidecode](https://pypi.python.org/pypi/Unidecode) library, you can use the transliteration
-    pipeline by setting the hyperparameter `cleaners=transliteration_pipeline`.
+    cleaners by setting the hyperparameter `cleaners=transliteration_cleaners`.

  * If you don't want to transliterate, you can define a custom character set.
    This allows you to train directly on the character set used in your data.

    To do so, edit [symbols.py](text/symbols.py) and change the `_characters` variable to be a
-    string containing the UTF-8 characters in your data. Then set the hyperparameter `cleaners=basic_pipeline`.
+    string containing the UTF-8 characters in your data. Then set the hyperparameter `cleaners=basic_cleaners`.

-  * If you're not sure which option to use, you can evaluate the transliteration pipeline like so:
+  * If you're not sure which option to use, you can evaluate the transliteration cleaners like this:

    ```python
    from text import cleaners
-    cleaners.transliteration_pipeline('Здравствуйте')   # Replace with the text you want to try
+    cleaners.transliteration_cleaners('Здравствуйте')   # Replace with the text you want to try
    ```
--- a/hparams.py
+++ b/hparams.py
@ -4,8 +4,8 @@ import tensorflow as tf
 # Default hyperparameters:
 hparams = tf.contrib.training.HParams(
  # Comma-separated list of cleaners to run on text prior to training and eval. For non-English
-  # text, you may want to use "basic_pipeline" or "transliteration_pipeline" See TRAINING_DATA.md.
-  cleaners='english_pipeline',
+  # text, you may want to use "basic_cleaners" or "transliteration_cleaners" See TRAINING_DATA.md.
+  cleaners='english_cleaners',

  # Audio:
  num_mels=80,
--- a/tests/text_test.py
+++ b/tests/text_test.py
@ -14,7 +14,7 @@ def test_text_to_sequence():
  assert text_to_sequence('"A"_B', []) == [2, 3, 1]
  assert text_to_sequence('A {AW1 S} B', []) == [2, 64, 83, 132, 64, 3, 1]
  assert text_to_sequence('Hi', ['lowercase']) == [35, 36, 1]
-  assert text_to_sequence('A {AW1 S}  B', ['english_pipeline']) == [28, 64, 83, 132, 64, 29, 1]
+  assert text_to_sequence('A {AW1 S}  B', ['english_cleaners']) == [28, 64, 83, 132, 64, 29, 1]


 def test_sequence_to_text():
@ -52,9 +52,9 @@ def test_expand_numbers():
  assert cleaners.expand_numbers('$3.50 for gas.') == 'three dollars, fifty cents for gas.'


-def test_pipelines():
+def test_cleaner_pipelines():
  text = 'Mr. Müller ate  2 Apples'
-  assert cleaners.english_pipeline(text) == 'mister muller ate two apples'
-  assert cleaners.transliteration_pipeline(text) == 'mr. muller ate 2 apples'
-  assert cleaners.basic_pipeline(text) == 'mr. müller ate 2 apples'
+  assert cleaners.english_cleaners(text) == 'mister muller ate two apples'
+  assert cleaners.transliteration_cleaners(text) == 'mr. muller ate 2 apples'
+  assert cleaners.basic_cleaners(text) == 'mr. müller ate 2 apples'

--- a/text/cleaners.py
+++ b/text/cleaners.py
@ -3,10 +3,10 @@ Cleaners are transformations that run over the input text at both training and e

 Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
 hyperparameter. Some cleaners are English-specific. You'll typically want to use:
-  1. "english_pipeline" for English text
-  2. "transliteration_pipeline" for non-English text that can be transliterated to ASCII using
+  1. "english_cleaners" for English text
+  2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
     the Unidecode library (https://pypi.python.org/pypi/Unidecode)
-  3. "basic_pipeline" if you do not want to transliterate (in this case, you should also update
+  3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
     the symbols in symbols.py to match your data).
 '''

@ -63,14 +63,14 @@ def convert_to_ascii(text):
  return unidecode(text)


-def basic_pipeline(text):
+def basic_cleaners(text):
  '''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
  text = lowercase(text)
  text = collapse_whitespace(text)
  return text


-def transliteration_pipeline(text):
+def transliteration_cleaners(text):
  '''Pipeline for non-English text that transliterates to ASCII.'''
  text = convert_to_ascii(text)
  text = lowercase(text)
@ -78,7 +78,7 @@ def transliteration_pipeline(text):
  return text


-def english_pipeline(text):
+def english_cleaners(text):
  '''Pipeline for English text, including number and abbreviation expansion.'''
  text = convert_to_ascii(text)
  text = lowercase(text)