Extract fraction and decimal methods.

This is part of a refactor of extractnumber_en, with the ultimate
goal of making it easier to maintain and extend (should also
improve perf).  This is in support of issues-1959.

All tests (minus extract_duration, which has not yet been implemented)
are passing at this stage.
pull/1977/head
Chris Rogers 2019-01-25 21:02:34 -05:00
parent 1a176da6b6
commit 8d588743d0
1 changed files with 83 additions and 21 deletions

View File

@ -104,6 +104,83 @@ _STRING_NUM_EN.update({
_STRING_SHORT_ORDINAL_EN = _invert_dict(SHORT_ORDINAL_STRING_EN)
_STRING_LONG_ORDINAL_EN = _invert_dict(LONG_ORDINAL_STRING_EN)
def _extract_fraction(text):
"""
Extract fraction numbers from a string.
This is a helper function for extractnumber_en. It is not intended
to be used on it's own.
This function handles text such as '2 and 3/4'.
Notes:
While this is a helper for extractnumber_en, it also depends on
extractnumber_en, to parse out the components of the fraction.
Args:
text str: The text to parse.
Returns:
int or float
None if no fraction value is found.
"""
# 2 and 3/4
for c in _FRACTION_MARKER:
components = text.split(c)
if len(components) == 2:
# ensure first is not a fraction and second is a fraction
num1 = extractnumber_en(components[0])
num2 = extractnumber_en(components[1])
if num1 is not None and num2 is not None \
and num1 >= 1 and 0 < num2 < 1:
return num1 + num2
return None
def _extract_decimal(text):
"""
Extract decimal numbers from a string.
This is a helper function for extractnumber_en. It is not intended
to be used on it's own.
This function handles text such as '2 point 5'.
Notes:
While this is a helper for extractnumber_en, it also depends on
extractnumber_en, to parse out the components of the decimal.
Args:
text str: The text to parse.
Returns:
int or float
None if no decimal value is found.
Args:
text str: The text to parse.
Returns:
float
None if no decimal value is found.
"""
# 2 point 5
for c in _DECIMAL_MARKER:
components = text.split(c)
if len(components) == 2:
number = extractnumber_en(components[0])
decimal = extractnumber_en(components[1])
if number is not None and decimal is not None:
# TODO handle number dot number number number
if "." not in str(decimal):
return number + float("0." + str(decimal))
return None
def extractnumber_en(text, short_scale=True, ordinals=False):
"""
This function extracts a number from a text string,
@ -132,28 +209,13 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
string_num_scale_en = _invert_dict(string_num_scale_en)
string_num_scale_en.update(_generate_plurals(string_num_scale_en))
# 2 and 3/4
for c in _FRACTION_MARKER:
components = text.split(c)
fraction = _extract_fraction(text)
if fraction:
return fraction
if len(components) == 2:
# ensure first is not a fraction and second is a fraction
num1 = extractnumber_en(components[0])
num2 = extractnumber_en(components[1])
if num1 is not None and num2 is not None \
and num1 >= 1 and 0 < num2 < 1:
return num1 + num2
# 2 point 5
for c in _DECIMAL_MARKER:
components = text.split(c)
if len(components) == 2:
number = extractnumber_en(components[0])
decimal = extractnumber_en(components[1])
if number is not None and decimal is not None:
# TODO handle number dot number number number
if "." not in str(decimal):
return number + float("0." + str(decimal))
decimal = _extract_decimal(text)
if decimal:
return decimal
aWords = text.split()
aWords = [word for word in aWords if word not in ["the", "a", "an"]]