Extract fraction and decimal methods.
This is part of a refactor of extractnumber_en, with the ultimate goal of making it easier to maintain and extend (should also improve perf). This is in support of issues-1959. All tests (minus extract_duration, which has not yet been implemented) are passing at this stage.pull/1977/head
parent
1a176da6b6
commit
8d588743d0
|
@ -104,6 +104,83 @@ _STRING_NUM_EN.update({
|
||||||
_STRING_SHORT_ORDINAL_EN = _invert_dict(SHORT_ORDINAL_STRING_EN)
|
_STRING_SHORT_ORDINAL_EN = _invert_dict(SHORT_ORDINAL_STRING_EN)
|
||||||
_STRING_LONG_ORDINAL_EN = _invert_dict(LONG_ORDINAL_STRING_EN)
|
_STRING_LONG_ORDINAL_EN = _invert_dict(LONG_ORDINAL_STRING_EN)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_fraction(text):
|
||||||
|
"""
|
||||||
|
Extract fraction numbers from a string.
|
||||||
|
|
||||||
|
This is a helper function for extractnumber_en. It is not intended
|
||||||
|
to be used on it's own.
|
||||||
|
|
||||||
|
This function handles text such as '2 and 3/4'.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
While this is a helper for extractnumber_en, it also depends on
|
||||||
|
extractnumber_en, to parse out the components of the fraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text str: The text to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int or float
|
||||||
|
None if no fraction value is found.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# 2 and 3/4
|
||||||
|
for c in _FRACTION_MARKER:
|
||||||
|
components = text.split(c)
|
||||||
|
|
||||||
|
if len(components) == 2:
|
||||||
|
# ensure first is not a fraction and second is a fraction
|
||||||
|
num1 = extractnumber_en(components[0])
|
||||||
|
num2 = extractnumber_en(components[1])
|
||||||
|
if num1 is not None and num2 is not None \
|
||||||
|
and num1 >= 1 and 0 < num2 < 1:
|
||||||
|
return num1 + num2
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_decimal(text):
|
||||||
|
"""
|
||||||
|
Extract decimal numbers from a string.
|
||||||
|
|
||||||
|
This is a helper function for extractnumber_en. It is not intended
|
||||||
|
to be used on it's own.
|
||||||
|
|
||||||
|
This function handles text such as '2 point 5'.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
While this is a helper for extractnumber_en, it also depends on
|
||||||
|
extractnumber_en, to parse out the components of the decimal.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text str: The text to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int or float
|
||||||
|
None if no decimal value is found.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text str: The text to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float
|
||||||
|
None if no decimal value is found.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# 2 point 5
|
||||||
|
for c in _DECIMAL_MARKER:
|
||||||
|
components = text.split(c)
|
||||||
|
if len(components) == 2:
|
||||||
|
number = extractnumber_en(components[0])
|
||||||
|
decimal = extractnumber_en(components[1])
|
||||||
|
if number is not None and decimal is not None:
|
||||||
|
# TODO handle number dot number number number
|
||||||
|
if "." not in str(decimal):
|
||||||
|
return number + float("0." + str(decimal))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def extractnumber_en(text, short_scale=True, ordinals=False):
|
def extractnumber_en(text, short_scale=True, ordinals=False):
|
||||||
"""
|
"""
|
||||||
This function extracts a number from a text string,
|
This function extracts a number from a text string,
|
||||||
|
@ -132,28 +209,13 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
|
||||||
string_num_scale_en = _invert_dict(string_num_scale_en)
|
string_num_scale_en = _invert_dict(string_num_scale_en)
|
||||||
string_num_scale_en.update(_generate_plurals(string_num_scale_en))
|
string_num_scale_en.update(_generate_plurals(string_num_scale_en))
|
||||||
|
|
||||||
# 2 and 3/4
|
fraction = _extract_fraction(text)
|
||||||
for c in _FRACTION_MARKER:
|
if fraction:
|
||||||
components = text.split(c)
|
return fraction
|
||||||
|
|
||||||
if len(components) == 2:
|
decimal = _extract_decimal(text)
|
||||||
# ensure first is not a fraction and second is a fraction
|
if decimal:
|
||||||
num1 = extractnumber_en(components[0])
|
return decimal
|
||||||
num2 = extractnumber_en(components[1])
|
|
||||||
if num1 is not None and num2 is not None \
|
|
||||||
and num1 >= 1 and 0 < num2 < 1:
|
|
||||||
return num1 + num2
|
|
||||||
|
|
||||||
# 2 point 5
|
|
||||||
for c in _DECIMAL_MARKER:
|
|
||||||
components = text.split(c)
|
|
||||||
if len(components) == 2:
|
|
||||||
number = extractnumber_en(components[0])
|
|
||||||
decimal = extractnumber_en(components[1])
|
|
||||||
if number is not None and decimal is not None:
|
|
||||||
# TODO handle number dot number number number
|
|
||||||
if "." not in str(decimal):
|
|
||||||
return number + float("0." + str(decimal))
|
|
||||||
|
|
||||||
aWords = text.split()
|
aWords = text.split()
|
||||||
aWords = [word for word in aWords if word not in ["the", "a", "an"]]
|
aWords = [word for word in aWords if word not in ["the", "a", "an"]]
|
||||||
|
|
Loading…
Reference in New Issue