Fix pep8 issues.

This is in support of issues-1959.
pull/1977/head
Chris Rogers 2019-02-02 14:03:19 -05:00
parent 7049e65cbe
commit 351381bca2
2 changed files with 69 additions and 35 deletions

View File

@ -161,9 +161,8 @@ class _ReplaceableNumber():
return "({v}, {t})".format(v=self.value, t=self.tokens) return "({v}, {t})".format(v=self.value, t=self.tokens)
def __repr__(self): def __repr__(self):
return "{n}({v}, {t})".format(n=self.__class__.__name__, return "{n}({v}, {t})".format(n=self.__class__.__name__, v=self.value,
v=self.value, t=self.tokens)
t=self.tokens)
def _tokenize(text): def _tokenize(text):
@ -225,23 +224,28 @@ def convert_words_to_numbers(text, short_scale=True, ordinals=False):
""" """
text = text.lower() text = text.lower()
tokens = _tokenize(text) tokens = _tokenize(text)
numbers_to_replace = _extract_numbers_with_text(tokens, short_scale, ordinals) numbers_to_replace = \
_extract_numbers_with_text(tokens, short_scale, ordinals)
numbers_to_replace.sort(key=lambda number: number.start_index) numbers_to_replace.sort(key=lambda number: number.start_index)
results = [] results = []
for token in tokens: for token in tokens:
if not numbers_to_replace or token.index < numbers_to_replace[0].start_index: if not numbers_to_replace or \
token.index < numbers_to_replace[0].start_index:
results.append(token.word) results.append(token.word)
else: else:
if numbers_to_replace and token.index == numbers_to_replace[0].start_index: if numbers_to_replace and \
token.index == numbers_to_replace[0].start_index:
results.append(str(numbers_to_replace[0].value)) results.append(str(numbers_to_replace[0].value))
if numbers_to_replace and token.index == numbers_to_replace[0].end_index: if numbers_to_replace and \
token.index == numbers_to_replace[0].end_index:
numbers_to_replace.pop(0) numbers_to_replace.pop(0)
return ' '.join(results) return ' '.join(results)
def _extract_numbers_with_text(tokens, short_scale=True, ordinals=False, fractional_numbers=True): def _extract_numbers_with_text(tokens, short_scale=True,
ordinals=False, fractional_numbers=True):
""" """
Extract all numbers from a list of _Tokens, with the words that Extract all numbers from a list of _Tokens, with the words that
represent them. represent them.
@ -264,20 +268,26 @@ def _extract_numbers_with_text(tokens, short_scale=True, ordinals=False, fractio
results = [] results = []
while True: while True:
to_replace = \ to_replace = \
_extract_number_with_text_en(tokens, short_scale, ordinals, fractional_numbers) _extract_number_with_text_en(tokens, short_scale,
ordinals, fractional_numbers)
if not to_replace: if not to_replace:
break break
results.append(to_replace) results.append(to_replace)
tokens = [t if not to_replace.start_index <= t.index <= to_replace.end_index else \ tokens = [
_Token(placeholder, t.index) for t in tokens] t if not
to_replace.start_index <= t.index <= to_replace.end_index
else
_Token(placeholder, t.index) for t in tokens
]
results.sort(key=lambda n: n.start_index) results.sort(key=lambda n: n.start_index)
return results return results
def _extract_number_with_text_en(tokens, short_scale=True, ordinals=False, fractional_numbers=True): def _extract_number_with_text_en(tokens, short_scale=True,
ordinals=False, fractional_numbers=True):
""" """
This function extracts a number from a list of _Tokens. This function extracts a number from a list of _Tokens.
@ -291,13 +301,17 @@ def _extract_number_with_text_en(tokens, short_scale=True, ordinals=False, fract
_ReplaceableNumber _ReplaceableNumber
""" """
number, tokens = _extract_number_with_text_en_helper(tokens, short_scale, ordinals, fractional_numbers) number, tokens = \
_extract_number_with_text_en_helper(tokens, short_scale,
ordinals, fractional_numbers)
while tokens and tokens[0].word in ARTICLES: while tokens and tokens[0].word in ARTICLES:
tokens.pop(0) tokens.pop(0)
return _ReplaceableNumber(number, tokens) return _ReplaceableNumber(number, tokens)
def _extract_number_with_text_en_helper(tokens, short_scale=True, ordinals=False, fractional_numbers=True): def _extract_number_with_text_en_helper(tokens,
short_scale=True, ordinals=False,
fractional_numbers=True):
""" """
Helber for _extract_number_with_text_en. Helber for _extract_number_with_text_en.
@ -312,11 +326,13 @@ def _extract_number_with_text_en_helper(tokens, short_scale=True, ordinals=False
""" """
if fractional_numbers: if fractional_numbers:
fraction, fraction_text = _extract_fraction_with_text_en(tokens, short_scale, ordinals) fraction, fraction_text = \
_extract_fraction_with_text_en(tokens, short_scale, ordinals)
if fraction: if fraction:
return fraction, fraction_text return fraction, fraction_text
decimal, decimal_text = _extract_decimal_with_text_en(tokens, short_scale, ordinals) decimal, decimal_text = \
_extract_decimal_with_text_en(tokens, short_scale, ordinals)
if decimal: if decimal:
return decimal, decimal_text return decimal, decimal_text
@ -345,8 +361,12 @@ def _extract_fraction_with_text_en(tokens, short_scale, ordinals):
partitions = _partition_list(tokens, lambda t: t.word == c) partitions = _partition_list(tokens, lambda t: t.word == c)
if len(partitions) == 3: if len(partitions) == 3:
numbers1 = _extract_numbers_with_text(partitions[0], short_scale, ordinals, fractional_numbers=False) numbers1 = \
numbers2 = _extract_numbers_with_text(partitions[2], short_scale, ordinals, fractional_numbers=True) _extract_numbers_with_text(partitions[0], short_scale,
ordinals, fractional_numbers=False)
numbers2 = \
_extract_numbers_with_text(partitions[2], short_scale,
ordinals, fractional_numbers=True)
if not numbers1 or not numbers2: if not numbers1 or not numbers2:
return None, None return None, None
@ -389,14 +409,18 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals):
partitions = _partition_list(tokens, lambda t: t.word == c) partitions = _partition_list(tokens, lambda t: t.word == c)
if len(partitions) == 3: if len(partitions) == 3:
numbers1 = _extract_numbers_with_text(partitions[0], short_scale, ordinals, fractional_numbers=False) numbers1 = \
numbers2 = _extract_numbers_with_text(partitions[2], short_scale, ordinals, fractional_numbers=False) _extract_numbers_with_text(partitions[0], short_scale,
ordinals, fractional_numbers=False)
numbers2 = \
_extract_numbers_with_text(partitions[2], short_scale,
ordinals, fractional_numbers=False)
if not numbers1 or not numbers2: if not numbers1 or not numbers2:
return None, None return None, None
number = numbers1[-1]# type: _ReplaceableNumber number = numbers1[-1]
decimal = numbers2[0] # type: _ReplaceableNumber decimal = numbers2[0]
# TODO handle number dot number number number # TODO handle number dot number number number
if "." not in str(decimal.text): if "." not in str(decimal.text):
@ -599,7 +623,8 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
was found was found
""" """
return _extract_number_with_text_en(_tokenize(text), short_scale, ordinals).value return _extract_number_with_text_en(_tokenize(text),
short_scale, ordinals).value
def extract_duration_en(text): def extract_duration_en(text):
@ -1439,7 +1464,8 @@ def extract_numbers_en(text, short_scale=True, ordinals=False):
Returns: Returns:
list: list of extracted numbers as floats list: list of extracted numbers as floats
""" """
results = _extract_numbers_with_text(_tokenize(text), short_scale, ordinals) results = _extract_numbers_with_text(_tokenize(text),
short_scale, ordinals)
return [float(result.value) for result in results] return [float(result.value) for result in results]

View File

@ -141,7 +141,6 @@ class TestNormalize(unittest.TestCase):
self.assertEqual(extract_number("a couple hundred beers"), 200) self.assertEqual(extract_number("a couple hundred beers"), 200)
self.assertEqual(extract_number("a couple thousand beers"), 2000) self.assertEqual(extract_number("a couple thousand beers"), 2000)
def test_extract_duration_en(self): def test_extract_duration_en(self):
self.assertEqual(extract_duration("10 seconds"), (10.0, "")) self.assertEqual(extract_duration("10 seconds"), (10.0, ""))
self.assertEqual(extract_duration("5 minutes"), (300.0, "")) self.assertEqual(extract_duration("5 minutes"), (300.0, ""))
@ -150,12 +149,21 @@ class TestNormalize(unittest.TestCase):
self.assertEqual(extract_duration("25 weeks"), (15120000.0, "")) self.assertEqual(extract_duration("25 weeks"), (15120000.0, ""))
self.assertEqual(extract_duration("seven hours"), (25200.0, "")) self.assertEqual(extract_duration("seven hours"), (25200.0, ""))
self.assertEqual(extract_duration("7.5 seconds"), (7.5, "")) self.assertEqual(extract_duration("7.5 seconds"), (7.5, ""))
self.assertEqual(extract_duration("eight and a half days thirty nine seconds"), (734439.0, "")) self.assertEqual(extract_duration("eight and a half days thirty"
self.assertEqual(extract_duration("Set a timer for 30 minutes"), (1800.0, "set a timer for")) " nine seconds"), (734439.0, ""))
self.assertEqual(extract_duration("Four and a half minutes until sunset"), (270.0, "until sunset")) self.assertEqual(extract_duration("Set a timer for 30 minutes"),
self.assertEqual(extract_duration("Nineteen minutes past the hour"), (1140.0, "past the hour")) (1800.0, "set a timer for"))
self.assertEqual(extract_duration("wake me up in three weeks, four hundred ninety seven days, and three hundred 91.6 seconds"), (44755591.6, "wake me up in , , and")) self.assertEqual(extract_duration("Four and a half minutes until"
self.assertEqual(extract_duration("The movie is one hour, fifty seven and a half minutes long"), (7050.0, "the movie is , long")) " sunset"), (270.0, "until sunset"))
self.assertEqual(extract_duration("Nineteen minutes past the hour"),
(1140.0, "past the hour"))
self.assertEqual(extract_duration("wake me up in three weeks, four"
" hundred ninety seven days, and"
" three hundred 91.6 seconds"),
(44755591.6, "wake me up in , , and"))
self.assertEqual(extract_duration("The movie is one hour, fifty seven"
" and a half minutes long"),
(7050.0, "the movie is , long"))
def test_extractdatetime_en(self): def test_extractdatetime_en(self):
def extractWithFormat(text): def extractWithFormat(text):
@ -554,10 +562,10 @@ class TestNormalize(unittest.TestCase):
self.assertEqual(extract_numbers("two pigs and six trillion bacteria", self.assertEqual(extract_numbers("two pigs and six trillion bacteria",
short_scale=False), [2, 6e18]) short_scale=False), [2, 6e18])
self.assertEqual(extract_numbers("thirty second or first", self.assertEqual(extract_numbers("thirty second or first",
ordinals=True), [32, 1]) ordinals=True), [32, 1])
self.assertEqual(extract_numbers("this is a seven eight nine and a " self.assertEqual(extract_numbers("this is a seven eight nine and a"
"half test"), " half test"),
[7.0, 8.0, 9.5]) [7.0, 8.0, 9.5])
def test_contractions(self): def test_contractions(self):
self.assertEqual(normalize("ain't"), "is not") self.assertEqual(normalize("ain't"), "is not")