Issues-1962 - Revert changes to normalize_en
parent
7fd59bf488
commit
644d75cfc9
|
@ -1137,21 +1137,15 @@ def normalize_en(text, remove_articles):
|
||||||
"you are not", "you are not", "you are", "you have"]
|
"you are not", "you are not", "you are", "you have"]
|
||||||
word = expansion[contraction.index(word)]
|
word = expansion[contraction.index(word)]
|
||||||
|
|
||||||
|
# Convert numbers into digits, e.g. "two" -> "2"
|
||||||
|
textNumbers = ["zero", "one", "two", "three", "four", "five", "six",
|
||||||
|
"seven", "eight", "nine", "ten", "eleven", "twelve",
|
||||||
|
"thirteen", "fourteen", "fifteen", "sixteen",
|
||||||
|
"seventeen", "eighteen", "nineteen", "twenty"]
|
||||||
|
|
||||||
|
if word in textNumbers:
|
||||||
|
word = str(textNumbers.index(word))
|
||||||
|
|
||||||
normalized += " " + word
|
normalized += " " + word
|
||||||
|
|
||||||
# replace extracted numbers
|
|
||||||
numbers = extract_numbers_en(normalized)
|
|
||||||
# sort by string size, "twenty two" should be replaced before "two"
|
|
||||||
numbers.sort(key=lambda s: len(pronounce_number_en(s)), reverse=True)
|
|
||||||
for n in numbers:
|
|
||||||
txt = pronounce_number_en(n)
|
|
||||||
n = str(n)
|
|
||||||
if n.endswith(".0"):
|
|
||||||
n = n[:-2]
|
|
||||||
normalized = normalized.replace(txt, n)
|
|
||||||
# prnounced may be different from txt, ie
|
|
||||||
# pronounce(0.5) != half
|
|
||||||
# extract(half) == 0.5
|
|
||||||
# TODO account for this
|
|
||||||
|
|
||||||
return normalized[1:] # strip the initial space
|
return normalized[1:] # strip the initial space
|
||||||
|
|
Loading…
Reference in New Issue