Fix decimal and fraction parsing.

This updates the _extract_fraction and _extract_decimal functions to handle the new token format.
2019-01-30 18:03:25 -05:00 · 2019-01-30 18:03:25 -05:00 · 71836b61ec
parent 48214ca66a
commit 71836b61ec
1 changed files with 57 additions and 33 deletions
--- a/mycroft/util/lang/parse_en.py
+++ b/mycroft/util/lang/parse_en.py
@ -114,6 +114,35 @@ _STRING_SHORT_ORDINAL_EN = _invert_dict(SHORT_ORDINAL_STRING_EN)
 _STRING_LONG_ORDINAL_EN = _invert_dict(LONG_ORDINAL_STRING_EN)
 def _partition_list(items, split_on):
    """
    Partition a list of items.
    Works similarly to str.partition
    Args:
        items:
        split_on callable:
            Should return a boolean. Each item will be passed to
            this callable in succession, and partitions will be
            created any time it returns True.
    Returns:
    """
    splits = []
    current_split = []
    for item in items:
        if split_on(item):
            splits.append(current_split)
            splits.append([item])
            current_split = []
        else:
            current_split.append(item)
    splits.append(current_split)
    return list(filter(lambda x: len(x) != 0, splits))
 def _extract_fraction(tokens):
    """
    Extract fraction numbers from a string.
@ -131,20 +160,21 @@ def _extract_fraction(tokens):
        tokens [_Token]: words and their indexes in the original string.
    Returns:
-        int or float
+        (int or float, [_Token])
-        None if no fraction value is found.
+        The value found, and the list of relevant tokens.
        (None, None) if no fraction value is found.
    """
-    if len(tokens) != 3 or tokens[1].word not in _FRACTION_MARKER:
+    for c in _FRACTION_MARKER:
-        return None, None
+        partitions = _partition_list(tokens, lambda t: t.word == c)
        if len(partitions) == 3:
            # ensure first is not a fraction and second is a fraction
-    num1, words1 = _extract_number_with_text_en(tokens[0])
+            num1, tokens1 = _extract_number_with_text_en(partitions[0])
-    num2, words2 = _extract_number_with_text_en(tokens[2])
+            num2, tokens2 = _extract_number_with_text_en(partitions[2])
            if num1 is not None and num2 is not None \
                    and num1 >= 1 and 0 < num2 < 1:
-        return num1 + num2, tokens
+                return num1 + num2, tokens1 + partitions[1] + tokens2
    else:
    return None, None
@ -164,31 +194,25 @@ def _extract_decimal(tokens):
        This does not currently handle things like:
            number dot number number number
    Args:
        text str: The text to parse.
    Returns:
        int or float
        None if no decimal value is found.
    Args:
        tokens [_Token]: The text to parse.
    Returns:
-        float
+        (float, [_Token])
-        None if no decimal value is found.
+        The value found and relevant tokens.
        (None, None) if no decimal value is found.
    """
-    if len(tokens) != 3 or tokens[1].word not in _DECIMAL_MARKER:
+    for c in _DECIMAL_MARKER:
-        return None, None
+        partitions = _partition_list(tokens, lambda t: t.word == c)
-
+        if len(partitions) == 3:
-    number, number_text = _extract_number_with_text_en(tokens[0])
+            number, tokens1 = _extract_number_with_text_en(partitions[0])
-    decimal, decimal_text = _extract_number_with_text_en(tokens[2])
+            decimal, tokens2 = _extract_number_with_text_en(partitions[2])
            if number is not None and decimal is not None:
                # TODO handle number dot number number number
                if "." not in str(decimal):
-            return number + float("0." + str(decimal)), tokens
+                    return number + float("0." + str(decimal)), \
-
+                           tokens1 + partitions[1] + tokens2
    return None, None