All regressions in number parsing fixed.
This is in support of issues-1959.pull/1977/head
parent
6da1ec5c6e
commit
534ca2aff9
|
@ -377,6 +377,7 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
|
||||||
next_val = None
|
next_val = None
|
||||||
to_sum = []
|
to_sum = []
|
||||||
for idx, token in enumerate(tokens):
|
for idx, token in enumerate(tokens):
|
||||||
|
current_val = None
|
||||||
if next_val:
|
if next_val:
|
||||||
next_val = None
|
next_val = None
|
||||||
continue
|
continue
|
||||||
|
@ -422,14 +423,18 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
|
||||||
val = int(word)
|
val = int(word)
|
||||||
else:
|
else:
|
||||||
val = float(word)
|
val = float(word)
|
||||||
|
current_val = val
|
||||||
|
|
||||||
# is this word the name of a number ?
|
# is this word the name of a number ?
|
||||||
if word in _STRING_NUM_EN:
|
if word in _STRING_NUM_EN:
|
||||||
val = _STRING_NUM_EN.get(word)
|
val = _STRING_NUM_EN.get(word)
|
||||||
|
current_val = val
|
||||||
elif word in string_num_scale:
|
elif word in string_num_scale:
|
||||||
val = string_num_scale.get(word)
|
val = string_num_scale.get(word)
|
||||||
|
current_val = val
|
||||||
elif ordinals and word in string_num_ordinal:
|
elif ordinals and word in string_num_ordinal:
|
||||||
val = string_num_ordinal[word]
|
val = string_num_ordinal[word]
|
||||||
|
current_val = val
|
||||||
|
|
||||||
# is the prev word an ordinal number and current word is one?
|
# is the prev word an ordinal number and current word is one?
|
||||||
# second one, third one
|
# second one, third one
|
||||||
|
@ -438,11 +443,7 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
|
||||||
|
|
||||||
# is the prev word a number and should we sum it?
|
# is the prev word a number and should we sum it?
|
||||||
# twenty two, fifty six
|
# twenty two, fifty six
|
||||||
if prev_word in _SUMS and (
|
if prev_word in _SUMS and val and val < 10:
|
||||||
word in _STRING_NUM_EN or
|
|
||||||
word in string_num_scale or
|
|
||||||
(ordinals and word in string_num_ordinal)):
|
|
||||||
if val and val < 10:
|
|
||||||
val = prev_val + val
|
val = prev_val + val
|
||||||
|
|
||||||
# is the prev word a number and should we multiply it?
|
# is the prev word a number and should we multiply it?
|
||||||
|
@ -456,6 +457,7 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
|
||||||
# half cup
|
# half cup
|
||||||
if val is False:
|
if val is False:
|
||||||
val = isFractional_en(word, short_scale=short_scale)
|
val = isFractional_en(word, short_scale=short_scale)
|
||||||
|
current_val = val
|
||||||
|
|
||||||
# 2 fifths
|
# 2 fifths
|
||||||
if not ordinals:
|
if not ordinals:
|
||||||
|
@ -476,8 +478,14 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
|
||||||
aPieces = word.split('/')
|
aPieces = word.split('/')
|
||||||
if look_for_fractions(aPieces):
|
if look_for_fractions(aPieces):
|
||||||
val = float(aPieces[0]) / float(aPieces[1])
|
val = float(aPieces[0]) / float(aPieces[1])
|
||||||
|
current_val = val
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
if prev_word in _SUMS and word not in _SUMS and current_val >= 10:
|
||||||
|
# Backtrack - we've got numbers we can't sum.
|
||||||
|
number_words.pop()
|
||||||
|
val = prev_val
|
||||||
|
break
|
||||||
prev_val = val
|
prev_val = val
|
||||||
|
|
||||||
# handle long numbers
|
# handle long numbers
|
||||||
|
|
|
@ -534,12 +534,14 @@ class TestNormalize(unittest.TestCase):
|
||||||
[2.0, 2.0])
|
[2.0, 2.0])
|
||||||
self.assertEqual(extract_numbers("twenty 20 twenty"),
|
self.assertEqual(extract_numbers("twenty 20 twenty"),
|
||||||
[20, 20, 20])
|
[20, 20, 20])
|
||||||
# self.assertEqual(extract_numbers("twenty 20 22"),
|
self.assertEqual(extract_numbers("twenty 20 22"),
|
||||||
# [20, 20, 22])
|
[20.0, 20.0, 22.0])
|
||||||
# self.assertEqual(extract_numbers("twenty twenty two twenty"),
|
self.assertEqual(extract_numbers("twenty twenty two twenty"),
|
||||||
# [20, 22, 20])
|
[20, 22, 20])
|
||||||
# self.assertEqual(extract_numbers("twenty 20 twenty 2"),
|
self.assertEqual(extract_numbers("twenty 2"),
|
||||||
# [20, 20, 20, 2])
|
[22.0])
|
||||||
|
self.assertEqual(extract_numbers("twenty 20 twenty 2"),
|
||||||
|
[20, 20, 22])
|
||||||
self.assertEqual(extract_numbers("third one"),
|
self.assertEqual(extract_numbers("third one"),
|
||||||
[1 / 3, 1])
|
[1 / 3, 1])
|
||||||
self.assertEqual(extract_numbers("third one", ordinals=True), [3])
|
self.assertEqual(extract_numbers("third one", ordinals=True), [3])
|
||||||
|
|
Loading…
Reference in New Issue