Issues-1962 - Revert changes to normalize_en

pull/1966/head
Chris Rogers 2019-01-21 18:37:04 -05:00
parent 7fd59bf488
commit 644d75cfc9
1 changed files with 9 additions and 15 deletions

View File

@ -1137,21 +1137,15 @@ def normalize_en(text, remove_articles):
"you are not", "you are not", "you are", "you have"]
word = expansion[contraction.index(word)]
# Convert numbers into digits, e.g. "two" -> "2"
textNumbers = ["zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine", "ten", "eleven", "twelve",
"thirteen", "fourteen", "fifteen", "sixteen",
"seventeen", "eighteen", "nineteen", "twenty"]
if word in textNumbers:
word = str(textNumbers.index(word))
normalized += " " + word
# replace extracted numbers
numbers = extract_numbers_en(normalized)
# sort by string size, "twenty two" should be replaced before "two"
numbers.sort(key=lambda s: len(pronounce_number_en(s)), reverse=True)
for n in numbers:
txt = pronounce_number_en(n)
n = str(n)
if n.endswith(".0"):
n = n[:-2]
normalized = normalized.replace(txt, n)
# prnounced may be different from txt, ie
# pronounce(0.5) != half
# extract(half) == 0.5
# TODO account for this
return normalized[1:] # strip the initial space