Issue-1877 - fix ordinal followed by one
"Third one" will now return the expected 3 instead of 1.pull/1879/head
parent
3e6dedbbcc
commit
32a3860bce
|
@ -138,6 +138,8 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
|
||||||
"million": 1000000,
|
"million": 1000000,
|
||||||
'millions': 1000000}
|
'millions': 1000000}
|
||||||
|
|
||||||
|
string_num_ordinal_en = {}
|
||||||
|
|
||||||
for num in NUM_STRING_EN:
|
for num in NUM_STRING_EN:
|
||||||
num_string = NUM_STRING_EN[num]
|
num_string = NUM_STRING_EN[num]
|
||||||
string_num_en[num_string] = num
|
string_num_en[num_string] = num
|
||||||
|
@ -147,10 +149,12 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
|
||||||
if short_scale:
|
if short_scale:
|
||||||
for num in SHORT_ORDINAL_STRING_EN:
|
for num in SHORT_ORDINAL_STRING_EN:
|
||||||
num_string = SHORT_ORDINAL_STRING_EN[num]
|
num_string = SHORT_ORDINAL_STRING_EN[num]
|
||||||
|
string_num_ordinal_en[num_string] = num
|
||||||
string_num_en[num_string] = num
|
string_num_en[num_string] = num
|
||||||
else:
|
else:
|
||||||
for num in LONG_ORDINAL_STRING_EN:
|
for num in LONG_ORDINAL_STRING_EN:
|
||||||
num_string = LONG_ORDINAL_STRING_EN[num]
|
num_string = LONG_ORDINAL_STRING_EN[num]
|
||||||
|
string_num_ordinal_en[num_string] = num
|
||||||
string_num_en[num_string] = num
|
string_num_en[num_string] = num
|
||||||
|
|
||||||
# negate next number (-2 = 0 - 2)
|
# negate next number (-2 = 0 - 2)
|
||||||
|
@ -229,6 +233,11 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
|
||||||
if word in string_num_en:
|
if word in string_num_en:
|
||||||
val = string_num_en[word]
|
val = string_num_en[word]
|
||||||
|
|
||||||
|
# is the prev word an ordinal number and current word is one?
|
||||||
|
# second one, third one
|
||||||
|
if ordinals and prev_word in string_num_ordinal_en and val is 1:
|
||||||
|
val = prev_val
|
||||||
|
|
||||||
# is the prev word a number and should we sum it?
|
# is the prev word a number and should we sum it?
|
||||||
# twenty two, fifty six
|
# twenty two, fifty six
|
||||||
if prev_word in sums and word in string_num_en:
|
if prev_word in sums and word in string_num_en:
|
||||||
|
@ -268,6 +277,7 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
prev_val = val
|
prev_val = val
|
||||||
|
|
||||||
# handle long numbers
|
# handle long numbers
|
||||||
# six hundred sixty six
|
# six hundred sixty six
|
||||||
# two million five hundred thousand
|
# two million five hundred thousand
|
||||||
|
|
|
@ -68,6 +68,9 @@ class TestNormalize(unittest.TestCase):
|
||||||
self.assertEqual(extract_number("this is the third test"), 1.0 / 3.0)
|
self.assertEqual(extract_number("this is the third test"), 1.0 / 3.0)
|
||||||
self.assertEqual(extract_number("this is the third test",
|
self.assertEqual(extract_number("this is the third test",
|
||||||
ordinals=True), 3.0)
|
ordinals=True), 3.0)
|
||||||
|
self.assertEqual(extract_number("the fourth one", ordinals=True), 4.0)
|
||||||
|
self.assertEqual(extract_number("the thirty sixth one",
|
||||||
|
ordinals=True), 36.0)
|
||||||
self.assertEqual(extract_number("this is test number 4"), 4)
|
self.assertEqual(extract_number("this is test number 4"), 4)
|
||||||
self.assertEqual(extract_number("one third of a cup"), 1.0 / 3.0)
|
self.assertEqual(extract_number("one third of a cup"), 1.0 / 3.0)
|
||||||
self.assertEqual(extract_number("three cups"), 3)
|
self.assertEqual(extract_number("three cups"), 3)
|
||||||
|
|
Loading…
Reference in New Issue