Issues-1962 - Revert changes to normalize_en
parent
7fd59bf488
commit
644d75cfc9
|
@ -1137,21 +1137,15 @@ def normalize_en(text, remove_articles):
|
|||
"you are not", "you are not", "you are", "you have"]
|
||||
word = expansion[contraction.index(word)]
|
||||
|
||||
# Convert numbers into digits, e.g. "two" -> "2"
|
||||
textNumbers = ["zero", "one", "two", "three", "four", "five", "six",
|
||||
"seven", "eight", "nine", "ten", "eleven", "twelve",
|
||||
"thirteen", "fourteen", "fifteen", "sixteen",
|
||||
"seventeen", "eighteen", "nineteen", "twenty"]
|
||||
|
||||
if word in textNumbers:
|
||||
word = str(textNumbers.index(word))
|
||||
|
||||
normalized += " " + word
|
||||
|
||||
# replace extracted numbers
|
||||
numbers = extract_numbers_en(normalized)
|
||||
# sort by string size, "twenty two" should be replaced before "two"
|
||||
numbers.sort(key=lambda s: len(pronounce_number_en(s)), reverse=True)
|
||||
for n in numbers:
|
||||
txt = pronounce_number_en(n)
|
||||
n = str(n)
|
||||
if n.endswith(".0"):
|
||||
n = n[:-2]
|
||||
normalized = normalized.replace(txt, n)
|
||||
# prnounced may be different from txt, ie
|
||||
# pronounce(0.5) != half
|
||||
# extract(half) == 0.5
|
||||
# TODO account for this
|
||||
|
||||
return normalized[1:] # strip the initial space
|
||||
|
|
Loading…
Reference in New Issue