diff --git a/mycroft/util/format.py b/mycroft/util/format.py
index b23b888cdb..6c655ffda0 100755
--- a/mycroft/util/format.py
+++ b/mycroft/util/format.py
@@ -1,5 +1,3 @@
-# -*- coding: iso-8859-15 -*-
-#
 # Copyright 2017 Mycroft AI Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,52 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-FRACTION_STRING_EN = {
-    2: 'half',
-    3: 'third',
-    4: 'forth',
-    5: 'fifth',
-    6: 'sixth',
-    7: 'seventh',
-    8: 'eigth',
-    9: 'ninth',
-    10: 'tenth',
-    11: 'eleventh',
-    12: 'twelveth',
-    13: 'thirteenth',
-    14: 'fourteenth',
-    15: 'fifteenth',
-    16: 'sixteenth',
-    17: 'seventeenth',
-    18: 'eighteenth',
-    19: 'nineteenth',
-    20: 'twentyith'
-}
 
-FRACTION_STRING_PT = {
-    2: 'meio',
-    3: u'terço',
-    4: 'quarto',
-    5: 'quinto',
-    6: 'sexto',
-    7: u'sétimo',
-    8: 'oitavo',
-    9: 'nono',
-    10: u'décimo',
-    11: 'onze avos',
-    12: 'doze avos',
-    13: 'treze avos',
-    14: 'catorze avos',
-    15: 'quinze avos',
-    16: 'dezasseis avos',
-    17: 'dezassete avos',
-    18: 'dezoito avos',
-    19: 'dezanove avos',
-    20: u'vigésimo',
-    30: u'trigésimo',
-    100: u'centésimo',
-    1000: u'milésimo'
-}
+from mycroft.util.lang.format_en import *
+from mycroft.util.lang.format_es import *
+from mycroft.util.lang.format_pt import *
 
 
 def nice_number(number, lang="en-us", speech=True, denominators=None):
@@ -95,55 +51,6 @@ def nice_number(number, lang="en-us", speech=True, denominators=None):
     return str(number)
 
 
-def nice_number_en(result):
-    """ English conversion for nice_number """
-    whole, num, den = result
-    if num == 0:
-        return str(whole)
-    den_str = FRACTION_STRING_EN[den]
-    if whole == 0:
-        if num == 1:
-            return_string = 'a {}'.format(den_str)
-        else:
-            return_string = '{} {}'.format(num, den_str)
-    elif num == 1:
-        return_string = '{} and a {}'.format(whole, den_str)
-    else:
-        return_string = '{} and {} {}'.format(whole, num, den_str)
-    if num > 1:
-        return_string += 's'
-    return return_string
-
-
-def nice_number_pt(result):
-    """ Portuguese conversion for nice_number """
-    whole, num, den = result
-    if num == 0:
-        return str(whole)
-    # denominador
-    den_str = FRACTION_STRING_PT[den]
-    # fracções
-    if whole == 0:
-        if num == 1:
-            # um décimo
-            return_string = 'um {}'.format(den_str)
-        else:
-            # três meio
-            return_string = '{} {}'.format(num, den_str)
-    # inteiros >10
-    elif num == 1:
-        # trinta e um
-        return_string = '{} e {}'.format(whole, den_str)
-    # inteiros >10 com fracções
-    else:
-        # vinte e 3 décimo
-        return_string = '{} e {} {}'.format(whole, num, den_str)
-    # plural
-    if num > 1:
-        return_string += 's'
-    return return_string
-
-
 def convert_number(number, denominators):
     """ Convert floats to mixed fractions """
     int_number = int(number)
diff --git a/mycroft/util/lang/__init__.py b/mycroft/util/lang/__init__.py
new file mode 100644
index 0000000000..b5d9e70242
--- /dev/null
+++ b/mycroft/util/lang/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/mycroft/util/lang/format_en.py b/mycroft/util/lang/format_en.py
new file mode 100644
index 0000000000..a99343af6f
--- /dev/null
+++ b/mycroft/util/lang/format_en.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+FRACTION_STRING_EN = {
+    2: 'half',
+    3: 'third',
+    4: 'forth',
+    5: 'fifth',
+    6: 'sixth',
+    7: 'seventh',
+    8: 'eigth',
+    9: 'ninth',
+    10: 'tenth',
+    11: 'eleventh',
+    12: 'twelveth',
+    13: 'thirteenth',
+    14: 'fourteenth',
+    15: 'fifteenth',
+    16: 'sixteenth',
+    17: 'seventeenth',
+    18: 'eighteenth',
+    19: 'nineteenth',
+    20: 'twentyith'
+}
+
+
+def nice_number_en(result):
+    """ English conversion for nice_number """
+    whole, num, den = result
+    if num == 0:
+        return str(whole)
+    den_str = FRACTION_STRING_EN[den]
+    if whole == 0:
+        if num == 1:
+            return_string = 'a {}'.format(den_str)
+        else:
+            return_string = '{} {}'.format(num, den_str)
+    elif num == 1:
+        return_string = '{} and a {}'.format(whole, den_str)
+    else:
+        return_string = '{} and {} {}'.format(whole, num, den_str)
+    if num > 1:
+        return_string += 's'
+    return return_string
diff --git a/mycroft/util/lang/format_pt.py b/mycroft/util/lang/format_pt.py
new file mode 100644
index 0000000000..296ded36a9
--- /dev/null
+++ b/mycroft/util/lang/format_pt.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+FRACTION_STRING_PT = {
+    2: 'meio',
+    3: u'terço',
+    4: 'quarto',
+    5: 'quinto',
+    6: 'sexto',
+    7: u'sétimo',
+    8: 'oitavo',
+    9: 'nono',
+    10: u'décimo',
+    11: 'onze avos',
+    12: 'doze avos',
+    13: 'treze avos',
+    14: 'catorze avos',
+    15: 'quinze avos',
+    16: 'dezasseis avos',
+    17: 'dezassete avos',
+    18: 'dezoito avos',
+    19: 'dezanove avos',
+    20: u'vigésimo',
+    30: u'trigésimo',
+    100: u'centésimo',
+    1000: u'milésimo'
+}
+
+
+def nice_number_pt(result):
+    """ Portuguese conversion for nice_number """
+    whole, num, den = result
+    if num == 0:
+        return str(whole)
+    # denominador
+    den_str = FRACTION_STRING_PT[den]
+    # fracções
+    if whole == 0:
+        if num == 1:
+            # um décimo
+            return_string = 'um {}'.format(den_str)
+        else:
+            # três meio
+            return_string = '{} {}'.format(num, den_str)
+    # inteiros >10
+    elif num == 1:
+        # trinta e um
+        return_string = '{} e {}'.format(whole, den_str)
+    # inteiros >10 com fracções
+    else:
+        # vinte e 3 décimo
+        return_string = '{} e {} {}'.format(whole, num, den_str)
+    # plural
+    if num > 1:
+        return_string += 's'
+    return return_string
diff --git a/mycroft/util/lang/parse_common.py b/mycroft/util/lang/parse_common.py
new file mode 100644
index 0000000000..22b1176ba2
--- /dev/null
+++ b/mycroft/util/lang/parse_common.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+def is_numeric(input_str):
+    """
+    Takes in a string and tests to see if it is a number.
+    Args:
+        text (str): string to test if a number
+    Returns:
+        (bool): True if a number, else False
+
+    """
+
+    try:
+        float(input_str)
+        return True
+    except ValueError:
+        return False
+
+
+def look_for_fractions(split_list):
+    """"
+    This function takes a list made by fraction & determines if a fraction.
+
+    Args:
+        split_list (list): list created by splitting on '/'
+    Returns:
+        (bool): False if not a fraction, otherwise True
+
+    """
+
+    if len(split_list) == 2:
+        if is_numeric(split_list[0]) and is_numeric(split_list[1]):
+            return True
+
+    return False
diff --git a/mycroft/util/lang/parse_en.py b/mycroft/util/lang/parse_en.py
new file mode 100644
index 0000000000..fef772f9e7
--- /dev/null
+++ b/mycroft/util/lang/parse_en.py
@@ -0,0 +1,835 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from datetime import datetime, timedelta
+from dateutil.relativedelta import relativedelta
+from mycroft.util.lang.parse_common import *
+
+
+def extractnumber_en(text):
+    """
+    This function prepares the given text for parsing by making
+    numbers consistent, getting rid of contractions, etc.
+    Args:
+        text (str): the string to normalize
+    Returns:
+        (int) or (float): The value of extracted number
+
+    """
+    aWords = text.split()
+    aWords = [word for word in aWords if word not in ["the", "a", "an"]]
+    andPass = False
+    valPreAnd = False
+    val = False
+    count = 0
+    while count < len(aWords):
+        word = aWords[count]
+        if is_numeric(word):
+            # if word.isdigit():            # doesn't work with decimals
+            val = float(word)
+        elif word == "first":
+            val = 1
+        elif word == "second":
+            val = 2
+        elif isFractional_en(word):
+            val = isFractional_en(word)
+        else:
+            if word == "one":
+                val = 1
+            elif word == "two":
+                val = 2
+            elif word == "three":
+                val = 3
+            elif word == "four":
+                val = 4
+            elif word == "five":
+                val = 5
+            elif word == "six":
+                val = 6
+            elif word == "seven":
+                val = 7
+            elif word == "eight":
+                val = 8
+            elif word == "nine":
+                val = 9
+            elif word == "ten":
+                val = 10
+            if val:
+                if count < (len(aWords) - 1):
+                    wordNext = aWords[count + 1]
+                else:
+                    wordNext = ""
+                valNext = isFractional_en(wordNext)
+
+                if valNext:
+                    val = val * valNext
+                    aWords[count + 1] = ""
+
+        # if val == False:
+        if not val:
+            # look for fractions like "2/3"
+            aPieces = word.split('/')
+            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
+            #   and is_numeric(aPieces[1])):
+            if look_for_fractions(aPieces):
+                val = float(aPieces[0]) / float(aPieces[1])
+            elif andPass:
+                # added to value, quit here
+                val = valPreAnd
+                break
+            else:
+                count += 1
+                continue
+
+        aWords[count] = ""
+
+        if (andPass):
+            aWords[count - 1] = ''  # remove "and"
+            val += valPreAnd
+        elif count + 1 < len(aWords) and aWords[count + 1] == 'and':
+            andPass = True
+            valPreAnd = val
+            val = False
+            count += 2
+            continue
+        elif count + 2 < len(aWords) and aWords[count + 2] == 'and':
+            andPass = True
+            valPreAnd = val
+            val = False
+            count += 3
+            continue
+
+        break
+
+    # if val == False:
+    if not val:
+        return False
+
+    # Return the $str with the number related words removed
+    # (now empty strings, so strlen == 0)
+    aWords = [word for word in aWords if len(word) > 0]
+    text = ' '.join(aWords)
+
+    return val
+
+
+def extract_datetime_en(str, currentDate=None):
+    def clean_string(str):
+        # cleans the input string of unneeded punctuation and capitalization
+        # among other things
+        str = str.lower().replace('?', '').replace('.', '').replace(',', '') \
+            .replace(' the ', ' ').replace(' a ', ' ').replace(' an ', ' ')
+        wordList = str.split()
+        for idx, word in enumerate(wordList):
+            word = word.replace("'s", "")
+
+            ordinals = ["rd", "st", "nd", "th"]
+            if word[0].isdigit():
+                for ord in ordinals:
+                    if ord in word:
+                        word = word.replace(ord, "")
+            wordList[idx] = word
+
+        return wordList
+
+    def date_found():
+        return found or \
+            (
+                datestr != "" or timeStr != "" or
+                yearOffset != 0 or monthOffset != 0 or
+                dayOffset is True or hrOffset != 0 or
+                hrAbs != 0 or minOffset != 0 or
+                minAbs != 0 or secOffset != 0
+            )
+
+    if str == "":
+        return None
+    if currentDate is None:
+        currentDate = datetime.now()
+
+    found = False
+    daySpecified = False
+    dayOffset = False
+    monthOffset = 0
+    yearOffset = 0
+    dateNow = currentDate
+    today = dateNow.strftime("%w")
+    currentYear = dateNow.strftime("%Y")
+    fromFlag = False
+    datestr = ""
+    hasYear = False
+    timeQualifier = ""
+
+    timeQualifiersList = ['morning', 'afternoon', 'evening']
+    markers = ['at', 'in', 'on', 'by', 'this', 'around', 'for', 'of']
+    days = ['monday', 'tuesday', 'wednesday',
+            'thursday', 'friday', 'saturday', 'sunday']
+    months = ['january', 'february', 'march', 'april', 'may', 'june',
+              'july', 'august', 'september', 'october', 'november',
+              'december']
+    monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
+                   'sept', 'oct', 'nov', 'dec']
+
+    words = clean_string(str)
+
+    for idx, word in enumerate(words):
+        if word == "":
+            continue
+        wordPrevPrev = words[idx - 2] if idx > 1 else ""
+        wordPrev = words[idx - 1] if idx > 0 else ""
+        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
+        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
+
+        # this isn't in clean string because I don't want to save back to words
+        word = word.rstrip('s')
+        start = idx
+        used = 0
+        # save timequalifier for later
+        if word in timeQualifiersList:
+            timeQualifier = word
+            # parse today, tomorrow, day after tomorrow
+        elif word == "today" and not fromFlag:
+            dayOffset = 0
+            used += 1
+        elif word == "tomorrow" and not fromFlag:
+            dayOffset = 1
+            used += 1
+        elif (word == "day" and
+                wordNext == "after" and
+                wordNextNext == "tomorrow" and
+                not fromFlag and
+                not wordPrev[0].isdigit()):
+            dayOffset = 2
+            used = 3
+            if wordPrev == "the":
+                start -= 1
+                used += 1
+                # parse 5 days, 10 weeks, last week, next week
+        elif word == "day":
+            if wordPrev[0].isdigit():
+                dayOffset += int(wordPrev)
+                start -= 1
+                used = 2
+        elif word == "week" and not fromFlag:
+            if wordPrev[0].isdigit():
+                dayOffset += int(wordPrev) * 7
+                start -= 1
+                used = 2
+            elif wordPrev == "next":
+                dayOffset = 7
+                start -= 1
+                used = 2
+            elif wordPrev == "last":
+                dayOffset = -7
+                start -= 1
+                used = 2
+                # parse 10 months, next month, last month
+        elif word == "month" and not fromFlag:
+            if wordPrev[0].isdigit():
+                monthOffset = int(wordPrev)
+                start -= 1
+                used = 2
+            elif wordPrev == "next":
+                monthOffset = 1
+                start -= 1
+                used = 2
+            elif wordPrev == "last":
+                monthOffset = -1
+                start -= 1
+                used = 2
+                # parse 5 years, next year, last year
+        elif word == "year" and not fromFlag:
+            if wordPrev[0].isdigit():
+                yearOffset = int(wordPrev)
+                start -= 1
+                used = 2
+            elif wordPrev == "next":
+                yearOffset = 1
+                start -= 1
+                used = 2
+            elif wordPrev == "last":
+                yearOffset = -1
+                start -= 1
+                used = 2
+                # parse Monday, Tuesday, etc., and next Monday,
+                # last Tuesday, etc.
+        elif word in days and not fromFlag:
+            d = days.index(word)
+            dayOffset = (d + 1) - int(today)
+            used = 1
+            if dayOffset < 0:
+                dayOffset += 7
+            if wordPrev == "next":
+                dayOffset += 7
+                used += 1
+                start -= 1
+            elif wordPrev == "last":
+                dayOffset -= 7
+                used += 1
+                start -= 1
+                # parse 15 of July, June 20th, Feb 18, 19 of February
+        elif word in months or word in monthsShort and not fromFlag:
+            try:
+                m = months.index(word)
+            except ValueError:
+                m = monthsShort.index(word)
+            used += 1
+            datestr = months[m]
+            if wordPrev and (wordPrev[0].isdigit() or
+                             (wordPrev == "of" and wordPrevPrev[0].isdigit())):
+                if wordPrev == "of" and wordPrevPrev[0].isdigit():
+                    datestr += " " + words[idx - 2]
+                    used += 1
+                    start -= 1
+                else:
+                    datestr += " " + wordPrev
+                start -= 1
+                used += 1
+                if wordNext and wordNext[0].isdigit():
+                    datestr += " " + wordNext
+                    used += 1
+                    hasYear = True
+                else:
+                    hasYear = False
+
+            elif wordNext and wordNext[0].isdigit():
+                datestr += " " + wordNext
+                used += 1
+                if wordNextNext and wordNextNext[0].isdigit():
+                    datestr += " " + wordNextNext
+                    used += 1
+                    hasYear = True
+                else:
+                    hasYear = False
+        # parse 5 days from tomorrow, 10 weeks from next thursday,
+        # 2 months from July
+        validFollowups = days + months + monthsShort
+        validFollowups.append("today")
+        validFollowups.append("tomorrow")
+        validFollowups.append("next")
+        validFollowups.append("last")
+        validFollowups.append("now")
+        if (word == "from" or word == "after") and wordNext in validFollowups:
+            used = 2
+            fromFlag = True
+            if wordNext == "tomorrow":
+                dayOffset += 1
+            elif wordNext in days:
+                d = days.index(wordNext)
+                tmpOffset = (d + 1) - int(today)
+                used = 2
+                if tmpOffset < 0:
+                    tmpOffset += 7
+                dayOffset += tmpOffset
+            elif wordNextNext and wordNextNext in days:
+                d = days.index(wordNextNext)
+                tmpOffset = (d + 1) - int(today)
+                used = 3
+                if wordNext == "next":
+                    tmpOffset += 7
+                    used += 1
+                    start -= 1
+                elif wordNext == "last":
+                    tmpOffset -= 7
+                    used += 1
+                    start -= 1
+                dayOffset += tmpOffset
+        if used > 0:
+            if start - 1 > 0 and words[start - 1] == "this":
+                start -= 1
+                used += 1
+
+            for i in range(0, used):
+                words[i + start] = ""
+
+            if (start - 1 >= 0 and words[start - 1] in markers):
+                words[start - 1] = ""
+            found = True
+            daySpecified = True
+
+    # parse time
+    timeStr = ""
+    hrOffset = 0
+    minOffset = 0
+    secOffset = 0
+    hrAbs = 0
+    minAbs = 0
+    military = False
+
+    for idx, word in enumerate(words):
+        if word == "":
+            continue
+
+        wordPrevPrev = words[idx - 2] if idx > 1 else ""
+        wordPrev = words[idx - 1] if idx > 0 else ""
+        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
+        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
+        # parse noon, midnight, morning, afternoon, evening
+        used = 0
+        if word == "noon":
+            hrAbs = 12
+            used += 1
+        elif word == "midnight":
+            hrAbs = 0
+            used += 1
+        elif word == "morning":
+            if hrAbs == 0:
+                hrAbs = 8
+            used += 1
+        elif word == "afternoon":
+            if hrAbs == 0:
+                hrAbs = 15
+            used += 1
+        elif word == "evening":
+            if hrAbs == 0:
+                hrAbs = 19
+            used += 1
+            # parse half an hour, quarter hour
+        elif word == "hour" and \
+                (wordPrev in markers or wordPrevPrev in markers):
+            if wordPrev == "half":
+                minOffset = 30
+            elif wordPrev == "quarter":
+                minOffset = 15
+            elif wordPrevPrev == "quarter":
+                minOffset = 15
+                if idx > 2 and words[idx - 3] in markers:
+                    words[idx - 3] = ""
+                words[idx - 2] = ""
+            else:
+                hrOffset = 1
+            if wordPrevPrev in markers:
+                words[idx - 2] = ""
+            words[idx - 1] = ""
+            used += 1
+            hrAbs = -1
+            minAbs = -1
+            # parse 5:00 am, 12:00 p.m., etc
+        elif word[0].isdigit():
+            isTime = True
+            strHH = ""
+            strMM = ""
+            remainder = ""
+            if ':' in word:
+                # parse colons
+                # "3:00 in the morning"
+                stage = 0
+                length = len(word)
+                for i in range(length):
+                    if stage == 0:
+                        if word[i].isdigit():
+                            strHH += word[i]
+                        elif word[i] == ":":
+                            stage = 1
+                        else:
+                            stage = 2
+                            i -= 1
+                    elif stage == 1:
+                        if word[i].isdigit():
+                            strMM += word[i]
+                        else:
+                            stage = 2
+                            i -= 1
+                    elif stage == 2:
+                        remainder = word[i:].replace(".", "")
+                        break
+                if remainder == "":
+                    nextWord = wordNext.replace(".", "")
+                    if nextWord == "am" or nextWord == "pm":
+                        remainder = nextWord
+                        used += 1
+                    elif nextWord == "tonight":
+                        remainder = "pm"
+                        used += 1
+                    elif wordNext == "in" and wordNextNext == "the" and \
+                            words[idx + 3] == "morning":
+                        reaminder = "am"
+                        used += 3
+                    elif wordNext == "in" and wordNextNext == "the" and \
+                            words[idx + 3] == "afternoon":
+                        remainder = "pm"
+                        used += 3
+                    elif wordNext == "in" and wordNextNext == "the" and \
+                            words[idx + 3] == "evening":
+                        remainder = "pm"
+                        used += 3
+                    elif wordNext == "in" and wordNextNext == "morning":
+                        remainder = "am"
+                        used += 2
+                    elif wordNext == "in" and wordNextNext == "afternoon":
+                        remainder = "pm"
+                        used += 2
+                    elif wordNext == "in" and wordNextNext == "evening":
+                        remainder = "pm"
+                        used += 2
+                    elif wordNext == "this" and wordNextNext == "morning":
+                        remainder = "am"
+                        used = 2
+                    elif wordNext == "this" and wordNextNext == "afternoon":
+                        remainder = "pm"
+                        used = 2
+                    elif wordNext == "this" and wordNextNext == "evening":
+                        remainder = "pm"
+                        used = 2
+                    elif wordNext == "at" and wordNextNext == "night":
+                        if strHH > 5:
+                            remainder = "pm"
+                        else:
+                            remainder = "am"
+                        used += 2
+                    else:
+                        if timeQualifier != "":
+                            military = True
+                            if strHH <= 12 and \
+                                    (timeQualifier == "evening" or
+                                     timeQualifier == "afternoon"):
+                                strHH += 12
+            else:
+                # try to parse # s without colons
+                # 5 hours, 10 minutes etc.
+                length = len(word)
+                strNum = ""
+                remainder = ""
+                for i in range(length):
+                    if word[i].isdigit():
+                        strNum += word[i]
+                    else:
+                        remainder += word[i]
+
+                if remainder == "":
+                    remainder = wordNext.replace(".", "").lstrip().rstrip()
+
+                if (
+                        remainder == "pm" or
+                        wordNext == "pm" or
+                        remainder == "p.m." or
+                        wordNext == "p.m."):
+                    strHH = strNum
+                    remainder = "pm"
+                    used = 1
+                elif (
+                        remainder == "am" or
+                        wordNext == "am" or
+                        remainder == "a.m." or
+                        wordNext == "a.m."):
+                    strHH = strNum
+                    remainder = "am"
+                    used = 1
+                else:
+                    if wordNext == "pm" or wordNext == "p.m.":
+                        strHH = strNum
+                        reaminder = "pm"
+                        used = 1
+                    elif wordNext == "am" or wordNext == "a.m.":
+                        strHH = strNum
+                        remainder = "am"
+                        used = 1
+                    elif (
+                            int(word) > 100 and
+                            (
+                                wordPrev == "o" or
+                                wordPrev == "oh"
+                            )):
+                        # 0800 hours (pronounced oh-eight-hundred)
+                        strHH = int(word) / 100
+                        strMM = int(word) - strHH * 100
+                        military = True
+                        if wordNext == "hours":
+                            used += 1
+                    elif (
+                            wordNext == "hours" and
+                            word[0] != '0' and
+                            (
+                                int(word) < 100 and
+                                int(word) > 2400
+                            )):
+                        # ignores military time
+                        # "in 3 hours"
+                        hrOffset = int(word)
+                        used = 2
+                        isTime = False
+                        hrAbs = -1
+                        minAbs = -1
+
+                    elif wordNext == "minutes":
+                        # "in 10 minutes"
+                        minOffset = int(word)
+                        used = 2
+                        isTime = False
+                        hrAbs = -1
+                        minAbs = -1
+                    elif wordNext == "seconds":
+                        # in 5 seconds
+                        secOffset = int(word)
+                        used = 2
+                        isTime = False
+                        hrAbs = -1
+                        minAbs = -1
+                    elif int(word) > 100:
+                        strHH = int(word) / 100
+                        strMM = int(word) - strHH * 100
+                        military = True
+                        if wordNext == "hours":
+                            used += 1
+                    elif wordNext[0].isdigit():
+                        strHH = word
+                        strMM = wordNext
+                        military = True
+                        used += 1
+                        if wordNextNext == "hours":
+                            used += 1
+                    elif (
+                            wordNext == "" or wordNext == "o'clock" or
+                            (
+                                        wordNext == "in" and
+                                        (
+                                            wordNextNext == "the" or
+                                            wordNextNext == timeQualifier
+                                        )
+                            )):
+                        strHH = word
+                        strMM = 00
+                        if wordNext == "o'clock":
+                            used += 1
+                        if wordNext == "in" or wordNextNext == "in":
+                            used += (1 if wordNext == "in" else 2)
+                            if (wordNextNext and
+                                wordNextNext in timeQualifier or
+                                (words[words.index(wordNextNext) + 1] and
+                                 words[words.index(wordNextNext) + 1] in
+                                 timeQualifier)):
+                                if (wordNextNext == "afternoon" or
+                                    (len(words) >
+                                     words.index(wordNextNext) + 1 and
+                                     words[words.index(
+                                         wordNextNext) + 1] == "afternoon")):
+                                    remainder = "pm"
+                                if (wordNextNext == "evening" or
+                                    (len(words) >
+                                     (words.index(wordNextNext) + 1) and
+                                     words[words.index(
+                                         wordNextNext) + 1] == "evening")):
+                                    remainder = "pm"
+                                if (wordNextNext == "morning" or
+                                    (len(words) >
+                                     words.index(wordNextNext) + 1 and
+                                     words[words.index(
+                                         wordNextNext) + 1] == "morning")):
+                                    remainder = "am"
+                        if timeQualifier != "":
+                            military = True
+                    else:
+                        isTime = False
+
+            strHH = int(strHH) if strHH else 0
+            strMM = int(strMM) if strMM else 0
+            strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
+            strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
+            if strHH > 24 or strMM > 59:
+                isTime = False
+                used = 0
+            if isTime:
+                hrAbs = strHH * 1
+                minAbs = strMM * 1
+                used += 1
+        if used > 0:
+            # removed parsed words from the sentence
+            for i in range(used):
+                words[idx + i] = ""
+
+            if wordPrev == "o" or wordPrev == "oh":
+                words[words.index(wordPrev)] = ""
+
+            if wordPrev == "early":
+                hrOffset = -1
+                words[idx - 1] = ""
+                idx -= 1
+            elif wordPrev == "late":
+                hrOffset = 1
+                words[idx - 1] = ""
+                idx -= 1
+            if idx > 0 and wordPrev in markers:
+                words[idx - 1] = ""
+            if idx > 1 and wordPrevPrev in markers:
+                words[idx - 2] = ""
+
+            idx += used - 1
+            found = True
+
+    # check that we found a date
+    if not date_found:
+        return None
+
+    if dayOffset is False:
+        dayOffset = 0
+
+    # perform date manipulation
+
+    extractedDate = dateNow
+    extractedDate = extractedDate.replace(microsecond=0,
+                                          second=0,
+                                          minute=0,
+                                          hour=0)
+    if datestr != "":
+        temp = datetime.strptime(datestr, "%B %d")
+        if not hasYear:
+            temp = temp.replace(year=extractedDate.year)
+            if extractedDate < temp:
+                extractedDate = extractedDate.replace(year=int(currentYear),
+                                                      month=int(
+                                                          temp.strftime(
+                                                              "%m")),
+                                                      day=int(temp.strftime(
+                                                          "%d")))
+            else:
+                extractedDate = extractedDate.replace(
+                    year=int(currentYear) + 1,
+                    month=int(temp.strftime("%m")),
+                    day=int(temp.strftime("%d")))
+        else:
+            extractedDate = extractedDate.replace(
+                year=int(temp.strftime("%Y")),
+                month=int(temp.strftime("%m")),
+                day=int(temp.strftime("%d")))
+
+    if timeStr != "":
+        temp = datetime(timeStr)
+        extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
+                                              minute=temp.strftime("%M"),
+                                              second=temp.strftime("%S"))
+
+    if yearOffset != 0:
+        extractedDate = extractedDate + relativedelta(years=yearOffset)
+    if monthOffset != 0:
+        extractedDate = extractedDate + relativedelta(months=monthOffset)
+    if dayOffset != 0:
+        extractedDate = extractedDate + relativedelta(days=dayOffset)
+    if hrAbs != -1 and minAbs != -1:
+
+        extractedDate = extractedDate + relativedelta(hours=hrAbs,
+                                                      minutes=minAbs)
+        if (hrAbs != 0 or minAbs != 0) and datestr == "":
+            if not daySpecified and dateNow > extractedDate:
+                extractedDate = extractedDate + relativedelta(days=1)
+    if hrOffset != 0:
+        extractedDate = extractedDate + relativedelta(hours=hrOffset)
+    if minOffset != 0:
+        extractedDate = extractedDate + relativedelta(minutes=minOffset)
+    if secOffset != 0:
+        extractedDate = extractedDate + relativedelta(seconds=secOffset)
+    for idx, word in enumerate(words):
+        if words[idx] == "and" and words[idx - 1] == "" and words[
+                idx + 1] == "":
+            words[idx] = ""
+
+    resultStr = " ".join(words)
+    resultStr = ' '.join(resultStr.split())
+    return [extractedDate, resultStr]
+
+
+def isFractional_en(input_str):
+    """
+    This function takes the given text and checks if it is a fraction.
+
+    Args:
+        text (str): the string to check if fractional
+    Returns:
+        (bool) or (float): False if not a fraction, otherwise the fraction
+
+    """
+    if input_str.endswith('s', -1):
+        input_str = input_str[:len(input_str) - 1]  # e.g. "fifths"
+
+    aFrac = ["whole", "half", "third", "fourth", "fifth", "sixth",
+             "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth"]
+
+    if input_str.lower() in aFrac:
+        return 1.0 / (aFrac.index(input_str) + 1)
+    if input_str == "quarter":
+        return 1.0 / 4
+
+    return False
+
+
+def normalize_en(text, remove_articles):
+    """ English string normalization """
+
+    words = text.split()  # this also removed extra spaces
+    normalized = ""
+    for word in words:
+        if remove_articles and word in ["the", "a", "an"]:
+            continue
+
+        # Expand common contractions, e.g. "isn't" -> "is not"
+        contraction = ["ain't", "aren't", "can't", "could've", "couldn't",
+                       "didn't", "doesn't", "don't", "gonna", "gotta",
+                       "hadn't", "hasn't", "haven't", "he'd", "he'll", "he's",
+                       "how'd", "how'll", "how's", "I'd", "I'll", "I'm",
+                       "I've", "isn't", "it'd", "it'll", "it's", "mightn't",
+                       "might've", "mustn't", "must've", "needn't",
+                       "oughtn't",
+                       "shan't", "she'd", "she'll", "she's", "shouldn't",
+                       "should've", "somebody's", "someone'd", "someone'll",
+                       "someone's", "that'll", "that's", "that'd", "there'd",
+                       "there're", "there's", "they'd", "they'll", "they're",
+                       "they've", "wasn't", "we'd", "we'll", "we're", "we've",
+                       "weren't", "what'd", "what'll", "what're", "what's",
+                       "whats",  # technically incorrect but some STT outputs
+                       "what've", "when's", "when'd", "where'd", "where's",
+                       "where've", "who'd", "who'd've", "who'll", "who're",
+                       "who's", "who've", "why'd", "why're", "why's", "won't",
+                       "won't've", "would've", "wouldn't", "wouldn't've",
+                       "y'all", "ya'll", "you'd", "you'd've", "you'll",
+                       "y'aint", "y'ain't", "you're", "you've"]
+        if word in contraction:
+            expansion = ["is not", "are not", "can not", "could have",
+                         "could not", "did not", "does not", "do not",
+                         "going to", "got to", "had not", "has not",
+                         "have not", "he would", "he will", "he is",
+                         "how did",
+                         "how will", "how is", "I would", "I will", "I am",
+                         "I have", "is not", "it would", "it will", "it is",
+                         "might not", "might have", "must not", "must have",
+                         "need not", "ought not", "shall not", "she would",
+                         "she will", "she is", "should not", "should have",
+                         "somebody is", "someone would", "someone will",
+                         "someone is", "that will", "that is", "that would",
+                         "there would", "there are", "there is", "they would",
+                         "they will", "they are", "they have", "was not",
+                         "we would", "we will", "we are", "we have",
+                         "were not", "what did", "what will", "what are",
+                         "what is",
+                         "what is", "what have", "when is", "when did",
+                         "where did", "where is", "where have", "who would",
+                         "who would have", "who will", "who are", "who is",
+                         "who have", "why did", "why are", "why is",
+                         "will not", "will not have", "would have",
+                         "would not", "would not have", "you all", "you all",
+                         "you would", "you would have", "you will",
+                         "you are not", "you are not", "you are", "you have"]
+            word = expansion[contraction.index(word)]
+
+        # Convert numbers into digits, e.g. "two" -> "2"
+        textNumbers = ["zero", "one", "two", "three", "four", "five", "six",
+                       "seven", "eight", "nine", "ten", "eleven", "twelve",
+                       "thirteen", "fourteen", "fifteen", "sixteen",
+                       "seventeen", "eighteen", "nineteen", "twenty"]
+        if word in textNumbers:
+            word = str(textNumbers.index(word))
+
+        normalized += " " + word
+
+    return normalized[1:]  # strip the initial space
diff --git a/mycroft/util/lang/parse_es.py b/mycroft/util/lang/parse_es.py
new file mode 100644
index 0000000000..221af2c8f5
--- /dev/null
+++ b/mycroft/util/lang/parse_es.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from datetime import datetime, timedelta
+from dateutil.relativedelta import relativedelta
+from mycroft.util.lang.parse_common import *
+"""
+    Parse functions for spanish (es)
+    TODO: numbers greater than 999999
+"""
+
+# Undefined articles ["un", "una", "unos", "unas"] can not be supressed,
+# in Spanish, "un caballo" means "a horse" or "one horse".
+es_articles = ["el", "la", "los", "las"]
+
+es_numbers_xlat = {
+    "un": 1,
+    "uno": 1,
+    "una": 1,
+    "dos": 2,
+    "tres": 3,
+    u"trï¿½s": 3,
+    "cuatro": 4,
+    "cinco": 5,
+    "seis": 6,
+    "siete": 7,
+    "ocho": 8,
+    "nueve": 9,
+    "diez": 10,
+    "once": 11,
+    "doce": 12,
+    "trece": 13,
+    "catorce": 14,
+    "quince": 15,
+    "dieciseis": 16,
+    u"dieciséis": 16,
+    "diecisiete": 17,
+    "dieciocho": 18,
+    "diecinueve": 19,
+    "veinte": 20,
+    "veintiuno": 21,
+    u"veintidï¿½s": 22,
+    u"veintitrï¿½s": 23,
+    "veintidos": 22,
+    "veintitres": 23,
+    u"veintitrés": 23,
+    "veinticuatro": 24,
+    "veinticinco": 25,
+    u"veintiséis": 26,
+    "veintiseis": 26,
+    "veintisiete": 27,
+    "veintiocho": 28,
+    "veintinueve": 29,
+    "treinta": 30,
+    "cuarenta": 40,
+    "cincuenta": 50,
+    "sesenta": 60,
+    "setenta": 70,
+    "ochenta": 80,
+    "noventa": 90,
+    "cien": 100,
+    "ciento": 100,
+    "doscientos": 200,
+    "doscientas": 200,
+    "trescientos": 300,
+    "trescientas": 300,
+    "cuatrocientos": 400,
+    "cuatrocientas": 400,
+    "quinientos": 500,
+    "quinientas": 500,
+    "seiscientos": 600,
+    "seiscientas": 600,
+    "setecientos": 700,
+    "setecientas": 700,
+    "ochocientos": 800,
+    "ochocientas": 800,
+    "novecientos": 900,
+    "novecientas": 900}
+
+
+def es_parse(words, i):
+    def es_cte(i, s):
+        if i < len(words) and s == words[i]:
+            return s, i + 1
+        return None
+
+    def es_number_word(i, mi, ma):
+        if i < len(words):
+            v = es_numbers_xlat.get(words[i])
+            if v and v >= mi and v <= ma:
+                return v, i + 1
+        return None
+
+    def es_number_1_99(i):
+        r1 = es_number_word(i, 1, 29)
+        if r1:
+            return r1
+
+        r1 = es_number_word(i, 30, 90)
+        if r1:
+            v1, i1 = r1
+            r2 = es_cte(i1, "y")
+            if r2:
+                v2, i2 = r2
+                r3 = es_number_word(i2, 1, 9)
+                if r3:
+                    v3, i3 = r3
+                    return v1 + v3, i3
+            return r1
+        return None
+
+    def es_number_1_999(i):
+        # [2-9]cientos [1-99]?
+        r1 = es_number_word(i, 100, 900)
+        if r1:
+            v1, i1 = r1
+            r2 = es_number_1_99(i1)
+            if r2:
+                v2, i2 = r2
+                return v1 + v2, i2
+            else:
+                return r1
+
+        # [1-99]
+        r1 = es_number_1_99(i)
+        if r1:
+            return r1
+
+        return None
+
+    def es_number(i):
+        # check for cero
+        r1 = es_number_word(i, 0, 0)
+        if r1:
+            return r1
+
+        # check for [1-999] (mil [0-999])?
+        r1 = es_number_1_999(i)
+        if r1:
+            v1, i1 = r1
+            r2 = es_cte(i1, "mil")
+            if r2:
+                v2, i2 = r2
+                r3 = es_number_1_999(i2)
+                if r3:
+                    v3, i3 = r3
+                    return v1 * 1000 + v3, i3
+                else:
+                    return v1 * 1000, i2
+            else:
+                return r1
+        return None
+
+    return es_number(i)
+
+
+def normalize_es(text, remove_articles):
+    """ Spanish string normalization """
+
+    words = text.split()  # this also removed extra spaces
+
+    normalized = ""
+    i = 0
+    while i < len(words):
+        word = words[i]
+
+        if remove_articles and word in es_articles:
+            i += 1
+            continue
+
+        # Convert numbers into digits
+        r = es_parse(words, i)
+        if r:
+            v, i = r
+            normalized += " " + str(v)
+            continue
+
+        normalized += " " + word
+        i += 1
+
+    return normalized[1:]  # strip the initial space
diff --git a/mycroft/util/lang/parse_pt.py b/mycroft/util/lang/parse_pt.py
new file mode 100644
index 0000000000..7ee731ae65
--- /dev/null
+++ b/mycroft/util/lang/parse_pt.py
@@ -0,0 +1,1199 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2017 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from datetime import datetime, timedelta
+from dateutil.relativedelta import relativedelta
+from mycroft.util.lang.parse_common import *
+"""
+    Parse functions for Portuguese (PT-PT)
+
+    TODO: numbers greater than 999999
+    TODO: date time pt
+"""
+
+# Undefined articles ["um", "uma", "uns", "umas"] can not be supressed,
+# in PT, "um cavalo" means "a horse" or "one horse".
+pt_articles = ["o", "a", "os", "as"]
+
+pt_numbers = {
+    "zero": 0,
+    "um": 1,
+    "uma": 1,
+    "uns": 1,
+    "umas": 1,
+    "primeiro": 1,
+    "segundo": 2,
+    "terceiro": 3,
+    "dois": 2,
+    "duas": 2,
+    "tres": 3,
+    u"três": 3,
+    "quatro": 4,
+    "cinco": 5,
+    "seis": 6,
+    "sete": 7,
+    "oito": 8,
+    "nove": 9,
+    "dez": 10,
+    "onze": 11,
+    "doze": 12,
+    "treze": 13,
+    "catorze": 14,
+    "quinze": 15,
+    "dezasseis": 16,
+    "dezassete": 17,
+    "dezoito": 18,
+    "dezanove": 19,
+    "vinte": 20,
+    "trinta": 30,
+    "quarenta": 40,
+    "cinquenta": 50,
+    "sessenta": 60,
+    "setenta": 70,
+    "oitenta": 80,
+    "noventa": 90,
+    "cem": 100,
+    "cento": 100,
+    "duzentos": 200,
+    "duzentas": 200,
+    "trezentos": 300,
+    "trezentas": 300,
+    "quatrocentos": 400,
+    "quatrocentas": 400,
+    "quinhentos": 500,
+    "quinhentas": 500,
+    "seiscentos": 600,
+    "seiscentas": 600,
+    "setecentos": 700,
+    "setecentas": 700,
+    "oitocentos": 800,
+    "oitocentas": 800,
+    "novecentos": 900,
+    "novecentas": 900,
+    "mil": 1000,
+    u"milhï¿½o": 1000000}
+
+
+def isFractional_pt(input_str):
+    """
+    This function takes the given text and checks if it is a fraction.
+
+    Args:
+        text (str): the string to check if fractional
+    Returns:
+        (bool) or (float): False if not a fraction, otherwise the fraction
+
+    """
+    if input_str.endswith('s', -1):
+        input_str = input_str[:len(input_str) - 1]  # e.g. "fifths"
+
+    aFrac = ["meio", u"terço", "quarto", "quinto", "sexto",
+             "setimo", "oitavo", "nono", u"décimo"]
+
+    if input_str.lower() in aFrac:
+        return 1.0 / (aFrac.index(input_str) + 2)
+    if input_str == u"vigésimo":
+        return 1.0 / 20
+    if input_str == u"trigésimo":
+        return 1.0 / 30
+    if input_str == u"centésimo":
+        return 1.0 / 100
+    if input_str == u"milésimo":
+        return 1.0 / 1000
+    if (input_str == u"sétimo" or input_str == "septimo" or
+            input_str == u"séptimo"):
+        return 1.0 / 7
+
+    return False
+
+
+def extractnumber_pt(text):
+    """
+    This function prepares the given text for parsing by making
+    numbers consistent, getting rid of contractions, etc.
+    Args:
+        text (str): the string to normalize
+    Returns:
+        (int) or (float): The value of extracted number
+
+    """
+    aWords = text.split()
+    count = 0
+    result = None
+    while count < len(aWords):
+        val = 0
+        word = aWords[count]
+        next_next_word = None
+        if count + 1 < len(aWords):
+            next_word = aWords[count + 1]
+            if count + 2 < len(aWords):
+                next_next_word = aWords[count + 2]
+        else:
+            next_word = None
+
+        # is current word a number?
+        if word in pt_numbers:
+            val = pt_numbers[word]
+        elif word.isdigit():  # doesn't work with decimals
+            val = int(word)
+        elif is_numeric(word):
+            val = float(word)
+        elif isFractional_pt(word):
+            if not result:
+                result = 1
+            result = result * isFractional_pt(word)
+            count += 1
+            continue
+
+        if not val:
+            # look for fractions like "2/3"
+            aPieces = word.split('/')
+            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
+            #   and is_numeric(aPieces[1])):
+            if look_for_fractions(aPieces):
+                val = float(aPieces[0]) / float(aPieces[1])
+
+        if val:
+            if result is None:
+                result = 0
+            # handle fractions
+            if next_word != "avos":
+                result += val
+            else:
+                result = float(result) / float(val)
+
+        if next_word is None:
+            break
+
+        # number word and fraction
+        ands = ["e"]
+        if next_word in ands:
+            zeros = 0
+            if result is None:
+                count += 1
+                continue
+            newWords = aWords[count + 2:]
+            newText = ""
+            for word in newWords:
+                newText += word + " "
+
+            afterAndVal = extractnumber_pt(newText[:-1])
+            if afterAndVal:
+                if result < afterAndVal or result < 20:
+                    while afterAndVal > 1:
+                        afterAndVal = afterAndVal / 10.0
+                    for word in newWords:
+                        if word == "zero" or word == "0":
+                            zeros += 1
+                        else:
+                            break
+                for i in range(0, zeros):
+                    afterAndVal = afterAndVal / 10.0
+                result += afterAndVal
+                break
+        elif next_next_word is not None:
+            if next_next_word in ands:
+                newWords = aWords[count + 3:]
+                newText = ""
+                for word in newWords:
+                    newText += word + " "
+                afterAndVal = extractnumber_pt(newText[:-1])
+                if afterAndVal:
+                    if result is None:
+                        result = 0
+                    result += afterAndVal
+                    break
+
+        decimals = ["ponto", "virgula", u"vï¿½rgula", ".", ","]
+        if next_word in decimals:
+            zeros = 0
+            newWords = aWords[count + 2:]
+            newText = ""
+            for word in newWords:
+                newText += word + " "
+            for word in newWords:
+                if word == "zero" or word == "0":
+                    zeros += 1
+                else:
+                    break
+            afterDotVal = str(extractnumber_pt(newText[:-1]))
+            afterDotVal = zeros * "0" + afterDotVal
+            result = float(str(result) + "." + afterDotVal)
+            break
+        count += 1
+
+    if result is None:
+        return False
+
+    # Return the $str with the number related words removed
+    # (now empty strings, so strlen == 0)
+    # aWords = [word for word in aWords if len(word) > 0]
+    # text = ' '.join(aWords)
+    if "." in str(result):
+        integer, dec = str(result).split(".")
+        # cast float to int
+        if dec == "0":
+            result = int(integer)
+
+    return result
+
+
+def pt_number_parse(words, i):
+    def pt_cte(i, s):
+        if i < len(words) and s == words[i]:
+            return s, i + 1
+        return None
+
+    def pt_number_word(i, mi, ma):
+        if i < len(words):
+            v = pt_numbers.get(words[i])
+            if v and v >= mi and v <= ma:
+                return v, i + 1
+        return None
+
+    def pt_number_1_99(i):
+        r1 = pt_number_word(i, 1, 29)
+        if r1:
+            return r1
+
+        r1 = pt_number_word(i, 30, 90)
+        if r1:
+            v1, i1 = r1
+            r2 = pt_cte(i1, "e")
+            if r2:
+                v2, i2 = r2
+                r3 = pt_number_word(i2, 1, 9)
+                if r3:
+                    v3, i3 = r3
+                    return v1 + v3, i3
+            return r1
+        return None
+
+    def pt_number_1_999(i):
+        # [2-9]cientos [1-99]?
+        r1 = pt_number_word(i, 100, 900)
+        if r1:
+            v1, i1 = r1
+            r2 = pt_number_1_99(i1)
+            if r2:
+                v2, i2 = r2
+                return v1 + v2, i2
+            else:
+                return r1
+
+        # [1-99]
+        r1 = pt_number_1_99(i)
+        if r1:
+            return r1
+
+        return None
+
+    def pt_number(i):
+        # check for cero
+        r1 = pt_number_word(i, 0, 0)
+        if r1:
+            return r1
+
+        # check for [1-999] (mil [0-999])?
+        r1 = pt_number_1_999(i)
+        if r1:
+            v1, i1 = r1
+            r2 = pt_cte(i1, "mil")
+            if r2:
+                v2, i2 = r2
+                r3 = pt_number_1_999(i2)
+                if r3:
+                    v3, i3 = r3
+                    return v1 * 1000 + v3, i3
+                else:
+                    return v1 * 1000, i2
+            else:
+                return r1
+        return None
+
+    return pt_number(i)
+
+
+def normalize_pt(text, remove_articles):
+    """ PT string normalization """
+
+    words = text.split()  # this also removed extra spaces
+    normalized = ""
+    # Contractions are not common in PT
+
+    # Convert numbers into digits, e.g. "dois" -> "2"
+    normalized = ""
+    i = 0
+    while i < len(words):
+        word = words[i]
+        # remove articles
+        if remove_articles and word in pt_articles:
+            i += 1
+            continue
+
+        # Convert numbers into digits
+        r = pt_number_parse(words, i)
+        if r:
+            v, i = r
+            normalized += " " + str(v)
+            continue
+
+        # NOTE temporary , handle some numbers above >999
+        if word in pt_numbers:
+            word = str(pt_numbers[word])
+        # end temporary
+
+        normalized += " " + word
+        i += 1
+    # some articles in pt-pt can not be removed, but many words can
+    # this is experimental and some meaning may be lost
+    # maybe agressive should default to False
+    # only usage will tell, as a native speaker this seems reasonable
+    return pt_pruning(normalized[1:], agressive=remove_articles)
+
+
+def extract_datetime_pt(input_str, currentDate=None):
+    def clean_string(str):
+        # cleans the input string of unneeded punctuation and capitalization
+        # among other things
+        symbols = [".", ",", ";", "?", "!", u"º", u"ª"]
+        noise_words = ["o", "os", "a", "as", "do", "da", "dos", "das", "de",
+                       "ao", "aos"]
+
+        for word in symbols:
+            str = str.replace(word, "")
+        for word in noise_words:
+            str = str.replace(" " + word + " ", " ")
+        str = str.lower().replace(
+            u"á",
+            "a").replace(
+            u"ç",
+            "c").replace(
+            u"à",
+            "a").replace(
+            u"ã",
+            "a").replace(
+            u"é",
+            "e").replace(
+            u"è",
+            "e").replace(
+            u"ê",
+            "e").replace(
+            u"ó",
+            "o").replace(
+            u"ò",
+            "o").replace(
+            "-",
+            " ").replace(
+            "_",
+            "")
+        # handle synonims and equivalents, "tomorrow early = tomorrow morning
+        synonims = {"manha": ["manhazinha", "cedo", "cedinho"],
+                    "tarde": ["tardinha", "tarde"],
+                    "noite": ["noitinha", "anoitecer"],
+                    "todos": ["ao", "aos"],
+                    "em": ["do", "da", "dos", "das", "de"]}
+        for syn in synonims:
+            for word in synonims[syn]:
+                str = str.replace(" " + word + " ", " " + syn + " ")
+        # relevant plurals, cant just extract all s in pt
+        wordlist = ["manhas", "noites", "tardes", "dias", "semanas", "anos",
+                    "minutos", "segundos", "nas", "nos", "proximas",
+                    "seguintes", "horas"]
+        for idx, word in enumerate(wordlist):
+            str = str.replace(word, word.rstrip('s'))
+        str = str.replace("meses", "mes").replace("anteriores", "anterior")
+        return str
+
+    def date_found():
+        return found or \
+            (
+                datestr != "" or timeStr != "" or
+                yearOffset != 0 or monthOffset != 0 or
+                dayOffset is True or hrOffset != 0 or
+                hrAbs != 0 or minOffset != 0 or
+                minAbs != 0 or secOffset != 0
+            )
+
+    if input_str == "":
+        return None
+    if currentDate is None:
+        currentDate = datetime.now()
+
+    found = False
+    daySpecified = False
+    dayOffset = False
+    monthOffset = 0
+    yearOffset = 0
+    dateNow = currentDate
+    today = dateNow.strftime("%w")
+    currentYear = dateNow.strftime("%Y")
+    fromFlag = False
+    datestr = ""
+    hasYear = False
+    timeQualifier = ""
+
+    words = clean_string(input_str).split(" ")
+    timeQualifiersList = ['manha', 'tarde', 'noite']
+    time_indicators = ["em", "as", "nas", "pelas", "volta", "depois", "estas",
+                       "no", "dia", "hora"]
+    days = ['segunda', 'terca', 'quarta',
+            'quinta', 'sexta', 'sabado', 'domingo']
+    months = ['janeiro', 'febreiro', 'marco', 'abril', 'maio', 'junho',
+              'julho', 'agosto', 'setembro', 'outubro', 'novembro',
+              'dezembro']
+    monthsShort = ['jan', 'feb', 'mar', 'abr', 'mai', 'jun', 'jul', 'ag',
+                   'set', 'out', 'nov', 'dec']
+    nexts = ["proximo", "proxima"]
+    suffix_nexts = ["seguinte", "subsequente", "seguir"]
+    lasts = ["ultimo", "ultima"]
+    suffix_lasts = ["passada", "passado", "anterior", "antes"]
+    nxts = ["depois", "seguir", "seguida", "seguinte", "proxima", "proximo"]
+    prevs = ["antes", "ante", "previa", "previamente", "anterior"]
+    froms = ["partir", "em", "para", "na", "no", "daqui", "seguir",
+             "depois", "por", "proxima", "proximo", "da", "do", "de"]
+    thises = ["este", "esta", "deste", "desta", "neste", "nesta", "nesse",
+              "nessa"]
+    froms += thises
+    lists = nxts + prevs + froms + time_indicators
+    for idx, word in enumerate(words):
+        if word == "":
+            continue
+        wordPrevPrevPrev = words[idx - 3] if idx > 2 else ""
+        wordPrevPrev = words[idx - 2] if idx > 1 else ""
+        wordPrev = words[idx - 1] if idx > 0 else ""
+        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
+        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
+        wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
+
+        start = idx
+        used = 0
+        # save timequalifier for later
+        if word in timeQualifiersList:
+            timeQualifier = word
+
+        # parse today, tomorrow, yesterday
+        elif word == "hoje" and not fromFlag:
+            dayOffset = 0
+            used += 1
+        elif word == "amanha" and not fromFlag:
+            dayOffset = 1
+            used += 1
+        elif word == "ontem" and not fromFlag:
+            dayOffset -= 1
+            used += 1
+        # "before yesterday" and "before before yesterday"
+        elif (word == "anteontem" or
+              (word == "ante" and wordNext == "ontem")) and not fromFlag:
+            dayOffset -= 2
+            used += 1
+            if wordNext == "ontem":
+                used += 1
+        elif word == "ante" and wordNext == "ante" and wordNextNext == \
+                "ontem" and not fromFlag:
+            dayOffset -= 3
+            used += 3
+        elif word == "anteanteontem" and not fromFlag:
+            dayOffset -= 3
+            used += 1
+        # day after tomorrow
+        elif word == "depois" and wordNext == "amanha" and not fromFlag:
+            dayOffset += 2
+            used = 2
+        # day before yesterday
+        elif word == "antes" and wordNext == "ontem" and not fromFlag:
+            dayOffset -= 2
+            used = 2
+        # parse 5 days, 10 weeks, last week, next week, week after
+        elif word == "dia":
+            if wordNext == "depois" or wordNext == "antes":
+                used += 1
+                if wordPrev and wordPrev[0].isdigit():
+                    dayOffset += int(wordPrev)
+                    start -= 1
+                    used += 1
+            elif (wordPrev and wordPrev[0].isdigit() and
+                    wordNext not in months and
+                    wordNext not in monthsShort):
+                dayOffset += int(wordPrev)
+                start -= 1
+                used += 2
+            elif wordNext and wordNext[0].isdigit() and wordNextNext not in \
+                    months and wordNextNext not in monthsShort:
+                dayOffset += int(wordNext)
+                start -= 1
+                used += 2
+
+        elif word == "semana" and not fromFlag:
+            if wordPrev[0].isdigit():
+                dayOffset += int(wordPrev) * 7
+                start -= 1
+                used = 2
+            for w in nexts:
+                if wordPrev == w:
+                    dayOffset = 7
+                    start -= 1
+                    used = 2
+            for w in lasts:
+                if wordPrev == w:
+                    dayOffset = -7
+                    start -= 1
+                    used = 2
+            for w in suffix_nexts:
+                if wordNext == w:
+                    dayOffset = 7
+                    start -= 1
+                    used = 2
+            for w in suffix_lasts:
+                if wordNext == w:
+                    dayOffset = -7
+                    start -= 1
+                    used = 2
+        # parse 10 months, next month, last month
+        elif word == "mes" and not fromFlag:
+            if wordPrev[0].isdigit():
+                monthOffset = int(wordPrev)
+                start -= 1
+                used = 2
+            for w in nexts:
+                if wordPrev == w:
+                    monthOffset = 7
+                    start -= 1
+                    used = 2
+            for w in lasts:
+                if wordPrev == w:
+                    monthOffset = -7
+                    start -= 1
+                    used = 2
+            for w in suffix_nexts:
+                if wordNext == w:
+                    monthOffset = 7
+                    start -= 1
+                    used = 2
+            for w in suffix_lasts:
+                if wordNext == w:
+                    monthOffset = -7
+                    start -= 1
+                    used = 2
+        # parse 5 years, next year, last year
+        elif word == "ano" and not fromFlag:
+            if wordPrev[0].isdigit():
+                yearOffset = int(wordPrev)
+                start -= 1
+                used = 2
+            for w in nexts:
+                if wordPrev == w:
+                    yearOffset = 7
+                    start -= 1
+                    used = 2
+            for w in lasts:
+                if wordPrev == w:
+                    yearOffset = -7
+                    start -= 1
+                    used = 2
+            for w in suffix_nexts:
+                if wordNext == w:
+                    yearOffset = 7
+                    start -= 1
+                    used = 2
+            for w in suffix_lasts:
+                if wordNext == w:
+                    yearOffset = -7
+                    start -= 1
+                    used = 2
+        # parse Monday, Tuesday, etc., and next Monday,
+        # last Tuesday, etc.
+        elif word in days and not fromFlag:
+
+            d = days.index(word)
+            dayOffset = (d + 1) - int(today)
+            used = 1
+            if dayOffset < 0:
+                dayOffset += 7
+            for w in nexts:
+                if wordPrev == w:
+                    dayOffset += 7
+                    used += 1
+                    start -= 1
+            for w in lasts:
+                if wordPrev == w:
+                    dayOffset -= 7
+                    used += 1
+                    start -= 1
+            for w in suffix_nexts:
+                if wordNext == w:
+                    dayOffset += 7
+                    used += 1
+                    start -= 1
+            for w in suffix_lasts:
+                if wordNext == w:
+                    dayOffset -= 7
+                    used += 1
+                    start -= 1
+            if wordNext == "feira":
+                used += 1
+        # parse 15 of July, June 20th, Feb 18, 19 of February
+        elif word in months or word in monthsShort:
+            try:
+                m = months.index(word)
+            except ValueError:
+                m = monthsShort.index(word)
+            used += 1
+            datestr = months[m]
+            if wordPrev and wordPrev[0].isdigit():
+                # 13 maio
+                datestr += " " + wordPrev
+                start -= 1
+                used += 1
+                if wordNext and wordNext[0].isdigit():
+                    datestr += " " + wordNext
+                    used += 1
+                    hasYear = True
+                else:
+                    hasYear = False
+
+            elif wordNext and wordNext[0].isdigit():
+                # maio 13
+                datestr += " " + wordNext
+                used += 1
+                if wordNextNext and wordNextNext[0].isdigit():
+                    datestr += " " + wordNextNext
+                    used += 1
+                    hasYear = True
+                else:
+                    hasYear = False
+
+            elif wordPrevPrev and wordPrevPrev[0].isdigit():
+                # 13 dia maio
+                datestr += " " + wordPrevPrev
+
+                start -= 2
+                used += 2
+                if wordNext and word[0].isdigit():
+                    datestr += " " + wordNext
+                    used += 1
+                    hasYear = True
+                else:
+                    hasYear = False
+
+            elif wordNextNext and wordNextNext[0].isdigit():
+                # maio dia 13
+                datestr += " " + wordNextNext
+                used += 2
+                if wordNextNextNext and wordNextNextNext[0].isdigit():
+                    datestr += " " + wordNextNextNext
+                    used += 1
+                    hasYear = True
+                else:
+                    hasYear = False
+
+            if datestr in months:
+                datestr = ""
+
+        # parse 5 days from tomorrow, 10 weeks from next thursday,
+        # 2 months from July
+        validFollowups = days + months + monthsShort
+        validFollowups.append("hoje")
+        validFollowups.append("amanha")
+        validFollowups.append("ontem")
+        validFollowups.append("anteontem")
+        validFollowups.append("agora")
+        validFollowups.append("ja")
+        validFollowups.append("ante")
+
+        # TODO debug word "depois" that one is failing for some reason
+        if word in froms and wordNext in validFollowups:
+
+            if not (wordNext == "amanha" and wordNext == "ontem") and not (
+                    word == "depois" or word == "antes" or word == "em"):
+                used = 2
+                fromFlag = True
+            if wordNext == "amanha" and word != "depois":
+                dayOffset += 1
+            elif wordNext == "ontem":
+                dayOffset -= 1
+            elif wordNext == "anteontem":
+                dayOffset -= 2
+            elif wordNext == "ante" and wordNextNext == "ontem":
+                dayOffset -= 2
+            elif (wordNext == "ante" and wordNext == "ante" and
+                  wordNextNextNext == "ontem"):
+                dayOffset -= 3
+            elif wordNext in days:
+                d = days.index(wordNext)
+                tmpOffset = (d + 1) - int(today)
+                used = 2
+                if wordNextNext == "feira":
+                    used += 1
+                if tmpOffset < 0:
+                    tmpOffset += 7
+                if wordNextNext:
+                    if wordNextNext in nxts:
+                        tmpOffset += 7
+                        used += 1
+                    elif wordNextNext in prevs:
+                        tmpOffset -= 7
+                        used += 1
+                dayOffset += tmpOffset
+            elif wordNextNext and wordNextNext in days:
+                d = days.index(wordNextNext)
+                tmpOffset = (d + 1) - int(today)
+                used = 3
+                if wordNextNextNext:
+                    if wordNextNextNext in nxts:
+                        tmpOffset += 7
+                        used += 1
+                    elif wordNextNextNext in prevs:
+                        tmpOffset -= 7
+                        used += 1
+                dayOffset += tmpOffset
+                if wordNextNextNext == "feira":
+                    used += 1
+        if wordNext in months:
+            used -= 1
+        if used > 0:
+
+            if start - 1 > 0 and words[start - 1] in lists:
+                start -= 1
+                used += 1
+
+            for i in range(0, used):
+                words[i + start] = ""
+
+            if (start - 1 >= 0 and words[start - 1] in lists):
+                words[start - 1] = ""
+            found = True
+            daySpecified = True
+
+    # parse time
+    timeStr = ""
+    hrOffset = 0
+    minOffset = 0
+    secOffset = 0
+    hrAbs = 0
+    minAbs = 0
+    military = False
+
+    for idx, word in enumerate(words):
+        if word == "":
+            continue
+
+        wordPrevPrev = words[idx - 2] if idx > 1 else ""
+        wordPrev = words[idx - 1] if idx > 0 else ""
+        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
+        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
+        wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
+        # parse noon, midnight, morning, afternoon, evening
+        used = 0
+        if word == "meio" and wordNext == "dia":
+            hrAbs = 12
+            used += 2
+        elif word == "meia" and wordNext == "noite":
+            hrAbs = 0
+            used += 2
+        elif word == "manha":
+            if hrAbs == 0:
+                hrAbs = 8
+            used += 1
+        elif word == "tarde":
+            if hrAbs == 0:
+                hrAbs = 15
+            used += 1
+        elif word == "meio" and wordNext == "tarde":
+            if hrAbs == 0:
+                hrAbs = 17
+            used += 2
+        elif word == "meio" and wordNext == "manha":
+            if hrAbs == 0:
+                hrAbs = 10
+            used += 2
+        elif word == "fim" and wordNext == "tarde":
+            if hrAbs == 0:
+                hrAbs = 19
+            used += 2
+        elif word == "fim" and wordNext == "manha":
+            if hrAbs == 0:
+                hrAbs = 11
+            used += 2
+        elif word == "tantas" and wordNext == "manha":
+            if hrAbs == 0:
+                hrAbs = 4
+            used += 2
+        elif word == "noite":
+            if hrAbs == 0:
+                hrAbs = 22
+            used += 1
+        # parse half an hour, quarter hour
+        elif word == "hora" and \
+                (wordPrev in time_indicators or wordPrevPrev in
+                    time_indicators):
+            if wordPrev == "meia":
+                minOffset = 30
+            elif wordPrev == "quarto":
+                minOffset = 15
+            elif wordPrevPrev == "quarto":
+                minOffset = 15
+                if idx > 2 and words[idx - 3] in time_indicators:
+                    words[idx - 3] = ""
+                words[idx - 2] = ""
+            else:
+                hrOffset = 1
+            if wordPrevPrev in time_indicators:
+                words[idx - 2] = ""
+            words[idx - 1] = ""
+            used += 1
+            hrAbs = -1
+            minAbs = -1
+        # parse 5:00 am, 12:00 p.m., etc
+        elif word[0].isdigit():
+            isTime = True
+            strHH = ""
+            strMM = ""
+            remainder = ""
+            if ':' in word:
+                # parse colons
+                # "3:00 in the morning"
+                stage = 0
+                length = len(word)
+                for i in range(length):
+                    if stage == 0:
+                        if word[i].isdigit():
+                            strHH += word[i]
+                        elif word[i] == ":":
+                            stage = 1
+                        else:
+                            stage = 2
+                            i -= 1
+                    elif stage == 1:
+                        if word[i].isdigit():
+                            strMM += word[i]
+                        else:
+                            stage = 2
+                            i -= 1
+                    elif stage == 2:
+                        remainder = word[i:].replace(".", "")
+                        break
+                if remainder == "":
+                    nextWord = wordNext.replace(".", "")
+                    if nextWord == "am" or nextWord == "pm":
+                        remainder = nextWord
+                        used += 1
+                    elif wordNext == "manha":
+                        remainder = "am"
+                        used += 1
+                    elif wordNext == "tarde":
+                        remainder = "pm"
+                        used += 1
+                    elif wordNext == "noite":
+                        if 0 < int(word[0]) < 6:
+                            remainder = "am"
+                        else:
+                            remainder = "pm"
+                        used += 1
+                    elif wordNext in thises and wordNextNext == "manha":
+                        remainder = "am"
+                        used = 2
+                    elif wordNext in thises and wordNextNext == "tarde":
+                        remainder = "pm"
+                        used = 2
+                    elif wordNext in thises and wordNextNext == "noite":
+                        remainder = "pm"
+                        used = 2
+                    else:
+                        if timeQualifier != "":
+                            military = True
+                            if strHH <= 12 and \
+                                    (timeQualifier == "manha" or
+                                     timeQualifier == "tarde"):
+                                strHH += 12
+
+            else:
+                # try to parse # s without colons
+                # 5 hours, 10 minutes etc.
+                length = len(word)
+                strNum = ""
+                remainder = ""
+                for i in range(length):
+                    if word[i].isdigit():
+                        strNum += word[i]
+                    else:
+                        remainder += word[i]
+
+                if remainder == "":
+                    remainder = wordNext.replace(".", "").lstrip().rstrip()
+
+                if (
+                        remainder == "pm" or
+                        wordNext == "pm" or
+                        remainder == "p.m." or
+                        wordNext == "p.m."):
+                    strHH = strNum
+                    remainder = "pm"
+                    used = 1
+                elif (
+                        remainder == "am" or
+                        wordNext == "am" or
+                        remainder == "a.m." or
+                        wordNext == "a.m."):
+                    strHH = strNum
+                    remainder = "am"
+                    used = 1
+                else:
+                    if (wordNext == "pm" or
+                            wordNext == "p.m." or
+                            wordNext == "tarde"):
+                        strHH = strNum
+                        remainder = "pm"
+                        used = 1
+                    elif (wordNext == "am" or
+                          wordNext == "a.m." or
+                          wordNext == "manha"):
+                        strHH = strNum
+                        remainder = "am"
+                        used = 1
+                    elif (int(word) > 100 and
+                            (
+                                wordPrev == "o" or
+                                wordPrev == "oh" or
+                                wordPrev == "zero"
+                            )):
+                        # 0800 hours (pronounced oh-eight-hundred)
+                        strHH = int(word) / 100
+                        strMM = int(word) - strHH * 100
+                        military = True
+                        if wordNext == "hora":
+                            used += 1
+                    elif (
+                            wordNext == "hora" and
+                            word[0] != '0' and
+                            (
+                                int(word) < 100 and
+                                int(word) > 2400
+                            )):
+                        # ignores military time
+                        # "in 3 hours"
+                        hrOffset = int(word)
+                        used = 2
+                        isTime = False
+                        hrAbs = -1
+                        minAbs = -1
+
+                    elif wordNext == "minuto":
+                        # "in 10 minutes"
+                        minOffset = int(word)
+                        used = 2
+                        isTime = False
+                        hrAbs = -1
+                        minAbs = -1
+                    elif wordNext == "segundo":
+                        # in 5 seconds
+                        secOffset = int(word)
+                        used = 2
+                        isTime = False
+                        hrAbs = -1
+                        minAbs = -1
+                    elif int(word) > 100:
+                        strHH = int(word) / 100
+                        strMM = int(word) - strHH * 100
+                        military = True
+                        if wordNext == "hora":
+                            used += 1
+
+                    elif wordNext == "" or (
+                            wordNext == "em" and wordNextNext == "ponto"):
+                        strHH = word
+                        strMM = 00
+                        if wordNext == "em" and wordNextNext == "ponto":
+                            used += 2
+                            if wordNextNextNext == "tarde":
+                                remainder = "pm"
+                                used += 1
+                            elif wordNextNextNext == "manha":
+                                remainder = "am"
+                                used += 1
+                            elif wordNextNextNext == "noite":
+                                if 0 > strHH > 6:
+                                    remainder = "am"
+                                else:
+                                    remainder = "pm"
+                                used += 1
+
+                    elif wordNext[0].isdigit():
+                        strHH = word
+                        strMM = wordNext
+                        military = True
+                        used += 1
+                        if wordNextNext == "hora":
+                            used += 1
+                    else:
+                        isTime = False
+
+            strHH = int(strHH) if strHH else 0
+            strMM = int(strMM) if strMM else 0
+            strHH = strHH + 12 if (remainder == "pm" and
+                                   0 < strHH < 12) else strHH
+            strHH = strHH - 12 if (remainder == "am" and
+                                   0 < strHH >= 12) else strHH
+            if strHH > 24 or strMM > 59:
+                isTime = False
+                used = 0
+            if isTime:
+                hrAbs = strHH * 1
+                minAbs = strMM * 1
+                used += 1
+
+        if used > 0:
+            # removed parsed words from the sentence
+            for i in range(used):
+                words[idx + i] = ""
+
+            if wordPrev == "em" or wordPrev == "ponto":
+                words[words.index(wordPrev)] = ""
+
+            if idx > 0 and wordPrev in time_indicators:
+                words[idx - 1] = ""
+            if idx > 1 and wordPrevPrev in time_indicators:
+                words[idx - 2] = ""
+
+            idx += used - 1
+            found = True
+
+    # check that we found a date
+    if not date_found:
+        return None
+
+    if dayOffset is False:
+        dayOffset = 0
+
+    # perform date manipulation
+
+    extractedDate = dateNow
+    extractedDate = extractedDate.replace(microsecond=0,
+                                          second=0,
+                                          minute=0,
+                                          hour=0)
+    if datestr != "":
+        en_months = ['january', 'february', 'march', 'april', 'may', 'june',
+                     'july', 'august', 'september', 'october', 'november',
+                     'december']
+        en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
+                          'aug',
+                          'sept', 'oct', 'nov', 'dec']
+        for idx, en_month in enumerate(en_months):
+            datestr = datestr.replace(months[idx], en_month)
+        for idx, en_month in enumerate(en_monthsShort):
+            datestr = datestr.replace(monthsShort[idx], en_month)
+
+        temp = datetime.strptime(datestr, "%B %d")
+        if not hasYear:
+            temp = temp.replace(year=extractedDate.year)
+            if extractedDate < temp:
+                extractedDate = extractedDate.replace(year=int(currentYear),
+                                                      month=int(
+                                                          temp.strftime(
+                                                              "%m")),
+                                                      day=int(temp.strftime(
+                                                          "%d")))
+            else:
+                extractedDate = extractedDate.replace(
+                    year=int(currentYear) + 1,
+                    month=int(temp.strftime("%m")),
+                    day=int(temp.strftime("%d")))
+        else:
+            extractedDate = extractedDate.replace(
+                year=int(temp.strftime("%Y")),
+                month=int(temp.strftime("%m")),
+                day=int(temp.strftime("%d")))
+
+    if timeStr != "":
+        temp = datetime(timeStr)
+        extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
+                                              minute=temp.strftime("%M"),
+                                              second=temp.strftime("%S"))
+
+    if yearOffset != 0:
+        extractedDate = extractedDate + relativedelta(years=yearOffset)
+    if monthOffset != 0:
+        extractedDate = extractedDate + relativedelta(months=monthOffset)
+    if dayOffset != 0:
+        extractedDate = extractedDate + relativedelta(days=dayOffset)
+    if hrAbs != -1 and minAbs != -1:
+
+        extractedDate = extractedDate + relativedelta(hours=hrAbs,
+                                                      minutes=minAbs)
+        if (hrAbs != 0 or minAbs != 0) and datestr == "":
+            if not daySpecified and dateNow > extractedDate:
+                extractedDate = extractedDate + relativedelta(days=1)
+    if hrOffset != 0:
+        extractedDate = extractedDate + relativedelta(hours=hrOffset)
+    if minOffset != 0:
+        extractedDate = extractedDate + relativedelta(minutes=minOffset)
+    if secOffset != 0:
+        extractedDate = extractedDate + relativedelta(seconds=secOffset)
+
+    resultStr = " ".join(words)
+    resultStr = ' '.join(resultStr.split())
+    resultStr = pt_pruning(resultStr)
+    return [extractedDate, resultStr]
+
+
+def pt_pruning(text, symbols=True, accents=True, agressive=True):
+    # agressive pt word pruning
+    words = ["a", "o", "os", "as", "de", "dos", "das",
+             "lhe", "lhes", "me", "e", "no", "nas", "na", "nos", "em", "para",
+             "este",
+             "esta", "deste", "desta", "neste", "nesta", "nesse",
+             "nessa", "foi", "que"]
+    if symbols:
+        symbols = [".", ",", ";", ":", "!", "?", u"ï¿½", u"ï¿½"]
+        for symbol in symbols:
+            text = text.replace(symbol, "")
+        text = text.replace("-", " ").replace("_", " ")
+    if accents:
+        accents = {"a": [u"á", u"à", u"ã", u"â"],
+                   "e": [u"ê", u"è", u"é"],
+                   "i": [u"í", u"ì"],
+                   "o": [u"ò", u"ó"],
+                   "u": [u"ú", u"ù"],
+                   "c": [u"ç"]}
+        for char in accents:
+            for acc in accents[char]:
+                text = text.replace(acc, char)
+    if agressive:
+        text_words = text.split(" ")
+        for idx, word in enumerate(text_words):
+            if word in words:
+                text_words[idx] = ""
+        text = " ".join(text_words)
+        text = ' '.join(text.split())
+    return text
+
+
+def get_gender_pt(word, raw_string=""):
+    word = word.rstrip("s")
+    gender = False
+    words = raw_string.split(" ")
+    for idx, w in enumerate(words):
+        if w == word and idx != 0:
+            previous = words[idx - 1]
+            gender = get_gender_pt(previous)
+            break
+    if not gender:
+        if word[-1] == "a":
+            gender = "f"
+        if word[-1] == "o" or word[-1] == "e":
+            gender = "m"
+    return gender
diff --git a/mycroft/util/parse.py b/mycroft/util/parse.py
index fae84a78b0..752f44d71b 100644
--- a/mycroft/util/parse.py
+++ b/mycroft/util/parse.py
@@ -1,4 +1,4 @@
-# -*- coding: iso-8859-15 -*-
+# -*- coding: utf-8 -*-
 #
 # Copyright 2017 Mycroft AI Inc.
 #
@@ -14,10 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from datetime import datetime, timedelta
-from dateutil.relativedelta import relativedelta
 from difflib import SequenceMatcher
 
+from mycroft.util.lang.parse_en import *
+from mycroft.util.lang.parse_pt import *
+from mycroft.util.lang.parse_es import *
+from mycroft.util.lang.parse_common import *
+
 
 def fuzzy_match(x, against):
     """Perform a 'fuzzy' comparison between two strings.
@@ -101,799 +104,6 @@ def extract_datetime(text, anchorDate=None, lang="en-us"):
         return extract_datetime_pt(text, anchorDate)
 
     return text
-
-
-def is_numeric(input_str):
-    """
-    Takes in a string and tests to see if it is a number.
-    Args:
-        text (str): string to test if a number
-    Returns:
-        (bool): True if a number, else False
-
-    """
-
-    try:
-        float(input_str)
-        return True
-    except ValueError:
-        return False
-
-
-def extractnumber_en(text):
-    """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
-    Args:
-        text (str): the string to normalize
-    Returns:
-        (int) or (float): The value of extracted number
-
-    """
-    aWords = text.split()
-    aWords = [word for word in aWords if word not in ["the", "a", "an"]]
-    andPass = False
-    valPreAnd = False
-    val = False
-    count = 0
-    while count < len(aWords):
-        word = aWords[count]
-        if is_numeric(word):
-            # if word.isdigit():            # doesn't work with decimals
-            val = float(word)
-        elif word == "first":
-            val = 1
-        elif word == "second":
-            val = 2
-        elif isFractional_en(word):
-            val = isFractional_en(word)
-        else:
-            if word == "one":
-                val = 1
-            elif word == "two":
-                val = 2
-            elif word == "three":
-                val = 3
-            elif word == "four":
-                val = 4
-            elif word == "five":
-                val = 5
-            elif word == "six":
-                val = 6
-            elif word == "seven":
-                val = 7
-            elif word == "eight":
-                val = 8
-            elif word == "nine":
-                val = 9
-            elif word == "ten":
-                val = 10
-            if val:
-                if count < (len(aWords) - 1):
-                    wordNext = aWords[count + 1]
-                else:
-                    wordNext = ""
-                valNext = isFractional_en(wordNext)
-
-                if valNext:
-                    val = val * valNext
-                    aWords[count + 1] = ""
-
-        # if val == False:
-        if not val:
-            # look for fractions like "2/3"
-            aPieces = word.split('/')
-            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
-            #   and is_numeric(aPieces[1])):
-            if look_for_fractions(aPieces):
-                val = float(aPieces[0]) / float(aPieces[1])
-            elif andPass:
-                # added to value, quit here
-                val = valPreAnd
-                break
-            else:
-                count += 1
-                continue
-
-        aWords[count] = ""
-
-        if (andPass):
-            aWords[count - 1] = ''  # remove "and"
-            val += valPreAnd
-        elif count + 1 < len(aWords) and aWords[count + 1] == 'and':
-            andPass = True
-            valPreAnd = val
-            val = False
-            count += 2
-            continue
-        elif count + 2 < len(aWords) and aWords[count + 2] == 'and':
-            andPass = True
-            valPreAnd = val
-            val = False
-            count += 3
-            continue
-
-        break
-
-    # if val == False:
-    if not val:
-        return False
-
-    # Return the $str with the number related words removed
-    # (now empty strings, so strlen == 0)
-    aWords = [word for word in aWords if len(word) > 0]
-    text = ' '.join(aWords)
-
-    return val
-
-
-def extract_datetime_en(str, currentDate=None):
-    def clean_string(str):
-        # cleans the input string of unneeded punctuation and capitalization
-        # among other things
-        str = str.lower().replace('?', '').replace('.', '').replace(',', '') \
-            .replace(' the ', ' ').replace(' a ', ' ').replace(' an ', ' ')
-        wordList = str.split()
-        for idx, word in enumerate(wordList):
-            word = word.replace("'s", "")
-
-            ordinals = ["rd", "st", "nd", "th"]
-            if word[0].isdigit():
-                for ord in ordinals:
-                    if ord in word:
-                        word = word.replace(ord, "")
-            wordList[idx] = word
-
-        return wordList
-
-    def date_found():
-        return found or \
-            (
-                datestr != "" or timeStr != "" or
-                yearOffset != 0 or monthOffset != 0 or
-                dayOffset is True or hrOffset != 0 or
-                hrAbs != 0 or minOffset != 0 or
-                minAbs != 0 or secOffset != 0
-            )
-
-    if str == "":
-        return None
-    if currentDate is None:
-        currentDate = datetime.now()
-
-    found = False
-    daySpecified = False
-    dayOffset = False
-    monthOffset = 0
-    yearOffset = 0
-    dateNow = currentDate
-    today = dateNow.strftime("%w")
-    currentYear = dateNow.strftime("%Y")
-    fromFlag = False
-    datestr = ""
-    hasYear = False
-    timeQualifier = ""
-
-    timeQualifiersList = ['morning', 'afternoon', 'evening']
-    markers = ['at', 'in', 'on', 'by', 'this', 'around', 'for', 'of']
-    days = ['monday', 'tuesday', 'wednesday',
-            'thursday', 'friday', 'saturday', 'sunday']
-    months = ['january', 'february', 'march', 'april', 'may', 'june',
-              'july', 'august', 'september', 'october', 'november',
-              'december']
-    monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
-                   'sept', 'oct', 'nov', 'dec']
-
-    words = clean_string(str)
-
-    for idx, word in enumerate(words):
-        if word == "":
-            continue
-        wordPrevPrev = words[idx - 2] if idx > 1 else ""
-        wordPrev = words[idx - 1] if idx > 0 else ""
-        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
-        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
-
-        # this isn't in clean string because I don't want to save back to words
-        word = word.rstrip('s')
-        start = idx
-        used = 0
-        # save timequalifier for later
-        if word in timeQualifiersList:
-            timeQualifier = word
-            # parse today, tomorrow, day after tomorrow
-        elif word == "today" and not fromFlag:
-            dayOffset = 0
-            used += 1
-        elif word == "tomorrow" and not fromFlag:
-            dayOffset = 1
-            used += 1
-        elif (word == "day" and
-                wordNext == "after" and
-                wordNextNext == "tomorrow" and
-                not fromFlag and
-                not wordPrev[0].isdigit()):
-            dayOffset = 2
-            used = 3
-            if wordPrev == "the":
-                start -= 1
-                used += 1
-                # parse 5 days, 10 weeks, last week, next week
-        elif word == "day":
-            if wordPrev[0].isdigit():
-                dayOffset += int(wordPrev)
-                start -= 1
-                used = 2
-        elif word == "week" and not fromFlag:
-            if wordPrev[0].isdigit():
-                dayOffset += int(wordPrev) * 7
-                start -= 1
-                used = 2
-            elif wordPrev == "next":
-                dayOffset = 7
-                start -= 1
-                used = 2
-            elif wordPrev == "last":
-                dayOffset = -7
-                start -= 1
-                used = 2
-                # parse 10 months, next month, last month
-        elif word == "month" and not fromFlag:
-            if wordPrev[0].isdigit():
-                monthOffset = int(wordPrev)
-                start -= 1
-                used = 2
-            elif wordPrev == "next":
-                monthOffset = 1
-                start -= 1
-                used = 2
-            elif wordPrev == "last":
-                monthOffset = -1
-                start -= 1
-                used = 2
-                # parse 5 years, next year, last year
-        elif word == "year" and not fromFlag:
-            if wordPrev[0].isdigit():
-                yearOffset = int(wordPrev)
-                start -= 1
-                used = 2
-            elif wordPrev == "next":
-                yearOffset = 1
-                start -= 1
-                used = 2
-            elif wordPrev == "last":
-                yearOffset = -1
-                start -= 1
-                used = 2
-                # parse Monday, Tuesday, etc., and next Monday,
-                # last Tuesday, etc.
-        elif word in days and not fromFlag:
-            d = days.index(word)
-            dayOffset = (d + 1) - int(today)
-            used = 1
-            if dayOffset < 0:
-                dayOffset += 7
-            if wordPrev == "next":
-                dayOffset += 7
-                used += 1
-                start -= 1
-            elif wordPrev == "last":
-                dayOffset -= 7
-                used += 1
-                start -= 1
-                # parse 15 of July, June 20th, Feb 18, 19 of February
-        elif word in months or word in monthsShort and not fromFlag:
-            try:
-                m = months.index(word)
-            except ValueError:
-                m = monthsShort.index(word)
-            used += 1
-            datestr = months[m]
-            if wordPrev and (wordPrev[0].isdigit() or
-                             (wordPrev == "of" and wordPrevPrev[0].isdigit())):
-                if wordPrev == "of" and wordPrevPrev[0].isdigit():
-                    datestr += " " + words[idx - 2]
-                    used += 1
-                    start -= 1
-                else:
-                    datestr += " " + wordPrev
-                start -= 1
-                used += 1
-                if wordNext and wordNext[0].isdigit():
-                    datestr += " " + wordNext
-                    used += 1
-                    hasYear = True
-                else:
-                    hasYear = False
-
-            elif wordNext and wordNext[0].isdigit():
-                datestr += " " + wordNext
-                used += 1
-                if wordNextNext and wordNextNext[0].isdigit():
-                    datestr += " " + wordNextNext
-                    used += 1
-                    hasYear = True
-                else:
-                    hasYear = False
-        # parse 5 days from tomorrow, 10 weeks from next thursday,
-        # 2 months from July
-        validFollowups = days + months + monthsShort
-        validFollowups.append("today")
-        validFollowups.append("tomorrow")
-        validFollowups.append("next")
-        validFollowups.append("last")
-        validFollowups.append("now")
-        if (word == "from" or word == "after") and wordNext in validFollowups:
-            used = 2
-            fromFlag = True
-            if wordNext == "tomorrow":
-                dayOffset += 1
-            elif wordNext in days:
-                d = days.index(wordNext)
-                tmpOffset = (d + 1) - int(today)
-                used = 2
-                if tmpOffset < 0:
-                    tmpOffset += 7
-                dayOffset += tmpOffset
-            elif wordNextNext and wordNextNext in days:
-                d = days.index(wordNextNext)
-                tmpOffset = (d + 1) - int(today)
-                used = 3
-                if wordNext == "next":
-                    tmpOffset += 7
-                    used += 1
-                    start -= 1
-                elif wordNext == "last":
-                    tmpOffset -= 7
-                    used += 1
-                    start -= 1
-                dayOffset += tmpOffset
-        if used > 0:
-            if start - 1 > 0 and words[start - 1] == "this":
-                start -= 1
-                used += 1
-
-            for i in range(0, used):
-                words[i + start] = ""
-
-            if (start - 1 >= 0 and words[start - 1] in markers):
-                words[start - 1] = ""
-            found = True
-            daySpecified = True
-
-    # parse time
-    timeStr = ""
-    hrOffset = 0
-    minOffset = 0
-    secOffset = 0
-    hrAbs = 0
-    minAbs = 0
-    military = False
-
-    for idx, word in enumerate(words):
-        if word == "":
-            continue
-
-        wordPrevPrev = words[idx - 2] if idx > 1 else ""
-        wordPrev = words[idx - 1] if idx > 0 else ""
-        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
-        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
-        # parse noon, midnight, morning, afternoon, evening
-        used = 0
-        if word == "noon":
-            hrAbs = 12
-            used += 1
-        elif word == "midnight":
-            hrAbs = 0
-            used += 1
-        elif word == "morning":
-            if hrAbs == 0:
-                hrAbs = 8
-            used += 1
-        elif word == "afternoon":
-            if hrAbs == 0:
-                hrAbs = 15
-            used += 1
-        elif word == "evening":
-            if hrAbs == 0:
-                hrAbs = 19
-            used += 1
-            # parse half an hour, quarter hour
-        elif word == "hour" and \
-                (wordPrev in markers or wordPrevPrev in markers):
-            if wordPrev == "half":
-                minOffset = 30
-            elif wordPrev == "quarter":
-                minOffset = 15
-            elif wordPrevPrev == "quarter":
-                minOffset = 15
-                if idx > 2 and words[idx - 3] in markers:
-                    words[idx - 3] = ""
-                words[idx - 2] = ""
-            else:
-                hrOffset = 1
-            if wordPrevPrev in markers:
-                words[idx - 2] = ""
-            words[idx - 1] = ""
-            used += 1
-            hrAbs = -1
-            minAbs = -1
-            # parse 5:00 am, 12:00 p.m., etc
-        elif word[0].isdigit():
-            isTime = True
-            strHH = ""
-            strMM = ""
-            remainder = ""
-            if ':' in word:
-                # parse colons
-                # "3:00 in the morning"
-                stage = 0
-                length = len(word)
-                for i in range(length):
-                    if stage == 0:
-                        if word[i].isdigit():
-                            strHH += word[i]
-                        elif word[i] == ":":
-                            stage = 1
-                        else:
-                            stage = 2
-                            i -= 1
-                    elif stage == 1:
-                        if word[i].isdigit():
-                            strMM += word[i]
-                        else:
-                            stage = 2
-                            i -= 1
-                    elif stage == 2:
-                        remainder = word[i:].replace(".", "")
-                        break
-                if remainder == "":
-                    nextWord = wordNext.replace(".", "")
-                    if nextWord == "am" or nextWord == "pm":
-                        remainder = nextWord
-                        used += 1
-                    elif nextWord == "tonight":
-                        remainder = "pm"
-                        used += 1
-                    elif wordNext == "in" and wordNextNext == "the" and \
-                            words[idx + 3] == "morning":
-                        reaminder = "am"
-                        used += 3
-                    elif wordNext == "in" and wordNextNext == "the" and \
-                            words[idx + 3] == "afternoon":
-                        remainder = "pm"
-                        used += 3
-                    elif wordNext == "in" and wordNextNext == "the" and \
-                            words[idx + 3] == "evening":
-                        remainder = "pm"
-                        used += 3
-                    elif wordNext == "in" and wordNextNext == "morning":
-                        remainder = "am"
-                        used += 2
-                    elif wordNext == "in" and wordNextNext == "afternoon":
-                        remainder = "pm"
-                        used += 2
-                    elif wordNext == "in" and wordNextNext == "evening":
-                        remainder = "pm"
-                        used += 2
-                    elif wordNext == "this" and wordNextNext == "morning":
-                        remainder = "am"
-                        used = 2
-                    elif wordNext == "this" and wordNextNext == "afternoon":
-                        remainder = "pm"
-                        used = 2
-                    elif wordNext == "this" and wordNextNext == "evening":
-                        remainder = "pm"
-                        used = 2
-                    elif wordNext == "at" and wordNextNext == "night":
-                        if strHH > 5:
-                            remainder = "pm"
-                        else:
-                            remainder = "am"
-                        used += 2
-                    else:
-                        if timeQualifier != "":
-                            military = True
-                            if strHH <= 12 and \
-                                    (timeQualifier == "evening" or
-                                     timeQualifier == "afternoon"):
-                                strHH += 12
-            else:
-                # try to parse # s without colons
-                # 5 hours, 10 minutes etc.
-                length = len(word)
-                strNum = ""
-                remainder = ""
-                for i in range(length):
-                    if word[i].isdigit():
-                        strNum += word[i]
-                    else:
-                        remainder += word[i]
-
-                if remainder == "":
-                    remainder = wordNext.replace(".", "").lstrip().rstrip()
-
-                if (
-                        remainder == "pm" or
-                        wordNext == "pm" or
-                        remainder == "p.m." or
-                        wordNext == "p.m."):
-                    strHH = strNum
-                    remainder = "pm"
-                    used = 1
-                elif (
-                        remainder == "am" or
-                        wordNext == "am" or
-                        remainder == "a.m." or
-                        wordNext == "a.m."):
-                    strHH = strNum
-                    remainder = "am"
-                    used = 1
-                else:
-                    if wordNext == "pm" or wordNext == "p.m.":
-                        strHH = strNum
-                        reaminder = "pm"
-                        used = 1
-                    elif wordNext == "am" or wordNext == "a.m.":
-                        strHH = strNum
-                        remainder = "am"
-                        used = 1
-                    elif (
-                            int(word) > 100 and
-                            (
-                                wordPrev == "o" or
-                                wordPrev == "oh"
-                            )):
-                        # 0800 hours (pronounced oh-eight-hundred)
-                        strHH = int(word) / 100
-                        strMM = int(word) - strHH * 100
-                        military = True
-                        if wordNext == "hours":
-                            used += 1
-                    elif (
-                            wordNext == "hours" and
-                            word[0] != '0' and
-                            (
-                                int(word) < 100 and
-                                int(word) > 2400
-                            )):
-                        # ignores military time
-                        # "in 3 hours"
-                        hrOffset = int(word)
-                        used = 2
-                        isTime = False
-                        hrAbs = -1
-                        minAbs = -1
-
-                    elif wordNext == "minutes":
-                        # "in 10 minutes"
-                        minOffset = int(word)
-                        used = 2
-                        isTime = False
-                        hrAbs = -1
-                        minAbs = -1
-                    elif wordNext == "seconds":
-                        # in 5 seconds
-                        secOffset = int(word)
-                        used = 2
-                        isTime = False
-                        hrAbs = -1
-                        minAbs = -1
-                    elif int(word) > 100:
-                        strHH = int(word) / 100
-                        strMM = int(word) - strHH * 100
-                        military = True
-                        if wordNext == "hours":
-                            used += 1
-                    elif wordNext[0].isdigit():
-                        strHH = word
-                        strMM = wordNext
-                        military = True
-                        used += 1
-                        if wordNextNext == "hours":
-                            used += 1
-                    elif (
-                            wordNext == "" or wordNext == "o'clock" or
-                            (
-                                        wordNext == "in" and
-                                        (
-                                            wordNextNext == "the" or
-                                            wordNextNext == timeQualifier
-                                        )
-                            )):
-                        strHH = word
-                        strMM = 00
-                        if wordNext == "o'clock":
-                            used += 1
-                        if wordNext == "in" or wordNextNext == "in":
-                            used += (1 if wordNext == "in" else 2)
-                            if (wordNextNext and
-                                wordNextNext in timeQualifier or
-                                (words[words.index(wordNextNext) + 1] and
-                                 words[words.index(wordNextNext) + 1] in
-                                 timeQualifier)):
-                                if (wordNextNext == "afternoon" or
-                                    (len(words) >
-                                     words.index(wordNextNext) + 1 and
-                                     words[words.index(
-                                         wordNextNext) + 1] == "afternoon")):
-                                    remainder = "pm"
-                                if (wordNextNext == "evening" or
-                                    (len(words) >
-                                     (words.index(wordNextNext) + 1) and
-                                     words[words.index(
-                                         wordNextNext) + 1] == "evening")):
-                                    remainder = "pm"
-                                if (wordNextNext == "morning" or
-                                    (len(words) >
-                                     words.index(wordNextNext) + 1 and
-                                     words[words.index(
-                                         wordNextNext) + 1] == "morning")):
-                                    remainder = "am"
-                        if timeQualifier != "":
-                            military = True
-                    else:
-                        isTime = False
-
-            strHH = int(strHH) if strHH else 0
-            strMM = int(strMM) if strMM else 0
-            strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
-            strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
-            if strHH > 24 or strMM > 59:
-                isTime = False
-                used = 0
-            if isTime:
-                hrAbs = strHH * 1
-                minAbs = strMM * 1
-                used += 1
-        if used > 0:
-            # removed parsed words from the sentence
-            for i in range(used):
-                words[idx + i] = ""
-
-            if wordPrev == "o" or wordPrev == "oh":
-                words[words.index(wordPrev)] = ""
-
-            if wordPrev == "early":
-                hrOffset = -1
-                words[idx - 1] = ""
-                idx -= 1
-            elif wordPrev == "late":
-                hrOffset = 1
-                words[idx - 1] = ""
-                idx -= 1
-            if idx > 0 and wordPrev in markers:
-                words[idx - 1] = ""
-            if idx > 1 and wordPrevPrev in markers:
-                words[idx - 2] = ""
-
-            idx += used - 1
-            found = True
-
-    # check that we found a date
-    if not date_found:
-        return None
-
-    if dayOffset is False:
-        dayOffset = 0
-
-    # perform date manipulation
-
-    extractedDate = dateNow
-    extractedDate = extractedDate.replace(microsecond=0,
-                                          second=0,
-                                          minute=0,
-                                          hour=0)
-    if datestr != "":
-        temp = datetime.strptime(datestr, "%B %d")
-        if not hasYear:
-            temp = temp.replace(year=extractedDate.year)
-            if extractedDate < temp:
-                extractedDate = extractedDate.replace(year=int(currentYear),
-                                                      month=int(
-                                                          temp.strftime(
-                                                              "%m")),
-                                                      day=int(temp.strftime(
-                                                          "%d")))
-            else:
-                extractedDate = extractedDate.replace(
-                    year=int(currentYear) + 1,
-                    month=int(temp.strftime("%m")),
-                    day=int(temp.strftime("%d")))
-        else:
-            extractedDate = extractedDate.replace(
-                year=int(temp.strftime("%Y")),
-                month=int(temp.strftime("%m")),
-                day=int(temp.strftime("%d")))
-
-    if timeStr != "":
-        temp = datetime(timeStr)
-        extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
-                                              minute=temp.strftime("%M"),
-                                              second=temp.strftime("%S"))
-
-    if yearOffset != 0:
-        extractedDate = extractedDate + relativedelta(years=yearOffset)
-    if monthOffset != 0:
-        extractedDate = extractedDate + relativedelta(months=monthOffset)
-    if dayOffset != 0:
-        extractedDate = extractedDate + relativedelta(days=dayOffset)
-    if hrAbs != -1 and minAbs != -1:
-
-        extractedDate = extractedDate + relativedelta(hours=hrAbs,
-                                                      minutes=minAbs)
-        if (hrAbs != 0 or minAbs != 0) and datestr == "":
-            if not daySpecified and dateNow > extractedDate:
-                extractedDate = extractedDate + relativedelta(days=1)
-    if hrOffset != 0:
-        extractedDate = extractedDate + relativedelta(hours=hrOffset)
-    if minOffset != 0:
-        extractedDate = extractedDate + relativedelta(minutes=minOffset)
-    if secOffset != 0:
-        extractedDate = extractedDate + relativedelta(seconds=secOffset)
-    for idx, word in enumerate(words):
-        if words[idx] == "and" and words[idx - 1] == "" and words[
-                idx + 1] == "":
-            words[idx] = ""
-
-    resultStr = " ".join(words)
-    resultStr = ' '.join(resultStr.split())
-    return [extractedDate, resultStr]
-
-
-def look_for_fractions(split_list):
-    """"
-    This function takes a list made by fraction & determines if a fraction.
-
-    Args:
-        split_list (list): list created by splitting on '/'
-    Returns:
-        (bool): False if not a fraction, otherwise True
-
-    """
-
-    if len(split_list) == 2:
-        if is_numeric(split_list[0]) and is_numeric(split_list[1]):
-            return True
-
-    return False
-
-
-def isFractional_en(input_str):
-    """
-    This function takes the given text and checks if it is a fraction.
-
-    Args:
-        text (str): the string to check if fractional
-    Returns:
-        (bool) or (float): False if not a fraction, otherwise the fraction
-
-    """
-    if input_str.endswith('s', -1):
-        input_str = input_str[:len(input_str) - 1]  # e.g. "fifths"
-
-    aFrac = ["whole", "half", "third", "fourth", "fifth", "sixth",
-             "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth"]
-
-    if input_str.lower() in aFrac:
-        return 1.0 / (aFrac.index(input_str) + 1)
-    if input_str == "quarter":
-        return 1.0 / 4
-
-    return False
-
-
-def get_gender(word, input_string="", lang="en-us"):
-    '''
-    guess gender of word, optionally use raw input text for context
-    returns "m" if the word is male, "f" if female, False if unknown
-    '''
-    if "pt" in lang or "es" in lang:
-        # spanish follows same rules
-        return get_gender_pt(word, input_string)
-    return False
-
-
 # ==============================================================
 
 
@@ -921,1431 +131,12 @@ def normalize(text, lang="en-us", remove_articles=True):
     return text
 
 
-def normalize_en(text, remove_articles):
-    """ English string normalization """
-
-    words = text.split()  # this also removed extra spaces
-    normalized = ""
-    for word in words:
-        if remove_articles and word in ["the", "a", "an"]:
-            continue
-
-        # Expand common contractions, e.g. "isn't" -> "is not"
-        contraction = ["ain't", "aren't", "can't", "could've", "couldn't",
-                       "didn't", "doesn't", "don't", "gonna", "gotta",
-                       "hadn't", "hasn't", "haven't", "he'd", "he'll", "he's",
-                       "how'd", "how'll", "how's", "I'd", "I'll", "I'm",
-                       "I've", "isn't", "it'd", "it'll", "it's", "mightn't",
-                       "might've", "mustn't", "must've", "needn't",
-                       "oughtn't",
-                       "shan't", "she'd", "she'll", "she's", "shouldn't",
-                       "should've", "somebody's", "someone'd", "someone'll",
-                       "someone's", "that'll", "that's", "that'd", "there'd",
-                       "there're", "there's", "they'd", "they'll", "they're",
-                       "they've", "wasn't", "we'd", "we'll", "we're", "we've",
-                       "weren't", "what'd", "what'll", "what're", "what's",
-                       "whats",  # technically incorrect but some STT outputs
-                       "what've", "when's", "when'd", "where'd", "where's",
-                       "where've", "who'd", "who'd've", "who'll", "who're",
-                       "who's", "who've", "why'd", "why're", "why's", "won't",
-                       "won't've", "would've", "wouldn't", "wouldn't've",
-                       "y'all", "ya'll", "you'd", "you'd've", "you'll",
-                       "y'aint", "y'ain't", "you're", "you've"]
-        if word in contraction:
-            expansion = ["is not", "are not", "can not", "could have",
-                         "could not", "did not", "does not", "do not",
-                         "going to", "got to", "had not", "has not",
-                         "have not", "he would", "he will", "he is",
-                         "how did",
-                         "how will", "how is", "I would", "I will", "I am",
-                         "I have", "is not", "it would", "it will", "it is",
-                         "might not", "might have", "must not", "must have",
-                         "need not", "ought not", "shall not", "she would",
-                         "she will", "she is", "should not", "should have",
-                         "somebody is", "someone would", "someone will",
-                         "someone is", "that will", "that is", "that would",
-                         "there would", "there are", "there is", "they would",
-                         "they will", "they are", "they have", "was not",
-                         "we would", "we will", "we are", "we have",
-                         "were not", "what did", "what will", "what are",
-                         "what is",
-                         "what is", "what have", "when is", "when did",
-                         "where did", "where is", "where have", "who would",
-                         "who would have", "who will", "who are", "who is",
-                         "who have", "why did", "why are", "why is",
-                         "will not", "will not have", "would have",
-                         "would not", "would not have", "you all", "you all",
-                         "you would", "you would have", "you will",
-                         "you are not", "you are not", "you are", "you have"]
-            word = expansion[contraction.index(word)]
-
-        # Convert numbers into digits, e.g. "two" -> "2"
-        textNumbers = ["zero", "one", "two", "three", "four", "five", "six",
-                       "seven", "eight", "nine", "ten", "eleven", "twelve",
-                       "thirteen", "fourteen", "fifteen", "sixteen",
-                       "seventeen", "eighteen", "nineteen", "twenty"]
-        if word in textNumbers:
-            word = str(textNumbers.index(word))
-
-        normalized += " " + word
-
-    return normalized[1:]  # strip the initial space
-
-
-####################################################################
-# PT-PT
-#
-# TODO: numbers greater than 999999
-# TODO: date time pt
-####################################################################
-
-# Undefined articles ["um", "uma", "uns", "umas"] can not be supressed,
-# in PT, "um cavalo" means "a horse" or "one horse".
-pt_articles = ["o", "a", "os", "as"]
-
-pt_numbers = {
-    "zero": 0,
-    "um": 1,
-    "uma": 1,
-    "uns": 1,
-    "umas": 1,
-    "primeiro": 1,
-    "segundo": 2,
-    "terceiro": 3,
-    "dois": 2,
-    "duas": 2,
-    "tres": 3,
-    u"tr�s": 3,
-    "quatro": 4,
-    "cinco": 5,
-    "seis": 6,
-    "sete": 7,
-    "oito": 8,
-    "nove": 9,
-    "dez": 10,
-    "onze": 11,
-    "doze": 12,
-    "treze": 13,
-    "catorze": 14,
-    "quinze": 15,
-    "dezasseis": 16,
-    "dezassete": 17,
-    "dezoito": 18,
-    "dezanove": 19,
-    "vinte": 20,
-    "trinta": 30,
-    "quarenta": 40,
-    "cinquenta": 50,
-    "sessenta": 60,
-    "setenta": 70,
-    "oitenta": 80,
-    "noventa": 90,
-    "cem": 100,
-    "cento": 100,
-    "duzentos": 200,
-    "duzentas": 200,
-    "trezentos": 300,
-    "trezentas": 300,
-    "quatrocentos": 400,
-    "quatrocentas": 400,
-    "quinhentos": 500,
-    "quinhentas": 500,
-    "seiscentos": 600,
-    "seiscentas": 600,
-    "setecentos": 700,
-    "setecentas": 700,
-    "oitocentos": 800,
-    "oitocentas": 800,
-    "novecentos": 900,
-    "novecentas": 900,
-    "mil": 1000,
-    u"milh�o": 1000000}
-
-
-def isFractional_pt(input_str):
-    """
-    This function takes the given text and checks if it is a fraction.
-
-    Args:
-        text (str): the string to check if fractional
-    Returns:
-        (bool) or (float): False if not a fraction, otherwise the fraction
-
-    """
-    if input_str.endswith('s', -1):
-        input_str = input_str[:len(input_str) - 1]  # e.g. "fifths"
-
-    aFrac = ["meio", u"ter�o", "quarto", "quinto", "sexto",
-             "setimo", "oitavo", "nono", u"d�cimo"]
-
-    if input_str.lower() in aFrac:
-        return 1.0 / (aFrac.index(input_str) + 2)
-    if input_str == u"vig�simo":
-        return 1.0 / 20
-    if input_str == u"trig�simo":
-        return 1.0 / 30
-    if input_str == u"cent�simo":
-        return 1.0 / 100
-    if input_str == u"mil�simo":
-        return 1.0 / 1000
-    if (input_str == u"s�timo" or input_str == "septimo" or
-            input_str == u"s�ptimo"):
-        return 1.0 / 7
-
+def get_gender(word, input_string="", lang="en-us"):
+    '''
+    guess gender of word, optionally use raw input text for context
+    returns "m" if the word is male, "f" if female, False if unknown
+    '''
+    if "pt" in lang or "es" in lang:
+        # spanish follows same rules
+        return get_gender_pt(word, input_string)
     return False
-
-
-def extractnumber_pt(text):
-    """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
-    Args:
-        text (str): the string to normalize
-    Returns:
-        (int) or (float): The value of extracted number
-
-    """
-    aWords = text.split()
-    count = 0
-    result = None
-    while count < len(aWords):
-        val = 0
-        word = aWords[count]
-        next_next_word = None
-        if count + 1 < len(aWords):
-            next_word = aWords[count + 1]
-            if count + 2 < len(aWords):
-                next_next_word = aWords[count + 2]
-        else:
-            next_word = None
-
-        # is current word a number?
-        if word in pt_numbers:
-            val = pt_numbers[word]
-        elif word.isdigit():  # doesn't work with decimals
-            val = int(word)
-        elif is_numeric(word):
-            val = float(word)
-        elif isFractional_pt(word):
-            if not result:
-                result = 1
-            result = result * isFractional_pt(word)
-            count += 1
-            continue
-
-        if not val:
-            # look for fractions like "2/3"
-            aPieces = word.split('/')
-            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
-            #   and is_numeric(aPieces[1])):
-            if look_for_fractions(aPieces):
-                val = float(aPieces[0]) / float(aPieces[1])
-
-        if val:
-            if result is None:
-                result = 0
-            # handle fractions
-            if next_word != "avos":
-                result += val
-            else:
-                result = float(result) / float(val)
-
-        if next_word is None:
-            break
-
-        # number word and fraction
-        ands = ["e"]
-        if next_word in ands:
-            zeros = 0
-            if result is None:
-                count += 1
-                continue
-            newWords = aWords[count + 2:]
-            newText = ""
-            for word in newWords:
-                newText += word + " "
-
-            afterAndVal = extractnumber_pt(newText[:-1])
-            if afterAndVal:
-                if result < afterAndVal or result < 20:
-                    while afterAndVal > 1:
-                        afterAndVal = afterAndVal / 10.0
-                    for word in newWords:
-                        if word == "zero" or word == "0":
-                            zeros += 1
-                        else:
-                            break
-                for i in range(0, zeros):
-                    afterAndVal = afterAndVal / 10.0
-                result += afterAndVal
-                break
-        elif next_next_word is not None:
-            if next_next_word in ands:
-                newWords = aWords[count + 3:]
-                newText = ""
-                for word in newWords:
-                    newText += word + " "
-                afterAndVal = extractnumber_pt(newText[:-1])
-                if afterAndVal:
-                    if result is None:
-                        result = 0
-                    result += afterAndVal
-                    break
-
-        decimals = ["ponto", "virgula", u"v�rgula", ".", ","]
-        if next_word in decimals:
-            zeros = 0
-            newWords = aWords[count + 2:]
-            newText = ""
-            for word in newWords:
-                newText += word + " "
-            for word in newWords:
-                if word == "zero" or word == "0":
-                    zeros += 1
-                else:
-                    break
-            afterDotVal = str(extractnumber_pt(newText[:-1]))
-            afterDotVal = zeros * "0" + afterDotVal
-            result = float(str(result) + "." + afterDotVal)
-            break
-        count += 1
-
-    if result is None:
-        return False
-
-    # Return the $str with the number related words removed
-    # (now empty strings, so strlen == 0)
-    # aWords = [word for word in aWords if len(word) > 0]
-    # text = ' '.join(aWords)
-    if "." in str(result):
-        integer, dec = str(result).split(".")
-        # cast float to int
-        if dec == "0":
-            result = int(integer)
-
-    return result
-
-
-def pt_number_parse(words, i):
-    def pt_cte(i, s):
-        if i < len(words) and s == words[i]:
-            return s, i + 1
-        return None
-
-    def pt_number_word(i, mi, ma):
-        if i < len(words):
-            v = pt_numbers.get(words[i])
-            if v and v >= mi and v <= ma:
-                return v, i + 1
-        return None
-
-    def pt_number_1_99(i):
-        r1 = pt_number_word(i, 1, 29)
-        if r1:
-            return r1
-
-        r1 = pt_number_word(i, 30, 90)
-        if r1:
-            v1, i1 = r1
-            r2 = pt_cte(i1, "e")
-            if r2:
-                v2, i2 = r2
-                r3 = pt_number_word(i2, 1, 9)
-                if r3:
-                    v3, i3 = r3
-                    return v1 + v3, i3
-            return r1
-        return None
-
-    def pt_number_1_999(i):
-        # [2-9]cientos [1-99]?
-        r1 = pt_number_word(i, 100, 900)
-        if r1:
-            v1, i1 = r1
-            r2 = pt_number_1_99(i1)
-            if r2:
-                v2, i2 = r2
-                return v1 + v2, i2
-            else:
-                return r1
-
-        # [1-99]
-        r1 = pt_number_1_99(i)
-        if r1:
-            return r1
-
-        return None
-
-    def pt_number(i):
-        # check for cero
-        r1 = pt_number_word(i, 0, 0)
-        if r1:
-            return r1
-
-        # check for [1-999] (mil [0-999])?
-        r1 = pt_number_1_999(i)
-        if r1:
-            v1, i1 = r1
-            r2 = pt_cte(i1, "mil")
-            if r2:
-                v2, i2 = r2
-                r3 = pt_number_1_999(i2)
-                if r3:
-                    v3, i3 = r3
-                    return v1 * 1000 + v3, i3
-                else:
-                    return v1 * 1000, i2
-            else:
-                return r1
-        return None
-
-    return pt_number(i)
-
-
-def normalize_pt(text, remove_articles):
-    """ PT string normalization """
-
-    words = text.split()  # this also removed extra spaces
-    normalized = ""
-    # Contractions are not common in PT
-
-    # Convert numbers into digits, e.g. "dois" -> "2"
-    normalized = ""
-    i = 0
-    while i < len(words):
-        word = words[i]
-        # remove articles
-        if remove_articles and word in pt_articles:
-            i += 1
-            continue
-
-        # Convert numbers into digits
-        r = pt_number_parse(words, i)
-        if r:
-            v, i = r
-            normalized += " " + str(v)
-            continue
-
-        # NOTE temporary , handle some numbers above >999
-        if word in pt_numbers:
-            word = str(pt_numbers[word])
-        # end temporary
-
-        normalized += " " + word
-        i += 1
-    # some articles in pt-pt can not be removed, but many words can
-    # this is experimental and some meaning may be lost
-    # maybe agressive should default to False
-    # only usage will tell, as a native speaker this seems reasonable
-    return pt_pruning(normalized[1:], agressive=remove_articles)
-
-
-def extract_datetime_pt(input_str, currentDate=None):
-    def clean_string(str):
-        # cleans the input string of unneeded punctuation and capitalization
-        # among other things
-        symbols = [".", ",", ";", "?", "!", u"�", u"�"]
-        noise_words = ["o", "os", "a", "as", "do", "da", "dos", "das", "de",
-                       "ao", "aos"]
-
-        for word in symbols:
-            str = str.replace(word, "")
-        for word in noise_words:
-            str = str.replace(" " + word + " ", " ")
-        str = str.lower().replace(
-            u"�",
-            "a").replace(
-            u"�",
-            "c").replace(
-            u"�",
-            "a").replace(
-            u"�",
-            "a").replace(
-            u"�",
-            "e").replace(
-            u"�",
-            "e").replace(
-            u"�",
-            "e").replace(
-            u"�",
-            "o").replace(
-            u"�",
-            "o").replace(
-            "-",
-            " ").replace(
-            "_",
-            "")
-        # handle synonims and equivalents, "tomorrow early = tomorrow morning
-        synonims = {"manha": ["manhazinha", "cedo", "cedinho"],
-                    "tarde": ["tardinha", "tarde"],
-                    "noite": ["noitinha", "anoitecer"],
-                    "todos": ["ao", "aos"],
-                    "em": ["do", "da", "dos", "das", "de"]}
-        for syn in synonims:
-            for word in synonims[syn]:
-                str = str.replace(" " + word + " ", " " + syn + " ")
-        # relevant plurals, cant just extract all s in pt
-        wordlist = ["manhas", "noites", "tardes", "dias", "semanas", "anos",
-                    "minutos", "segundos", "nas", "nos", "proximas",
-                    "seguintes", "horas"]
-        for idx, word in enumerate(wordlist):
-            str = str.replace(word, word.rstrip('s'))
-        str = str.replace("meses", "mes").replace("anteriores", "anterior")
-        return str
-
-    def date_found():
-        return found or \
-            (
-                datestr != "" or timeStr != "" or
-                yearOffset != 0 or monthOffset != 0 or
-                dayOffset is True or hrOffset != 0 or
-                hrAbs != 0 or minOffset != 0 or
-                minAbs != 0 or secOffset != 0
-            )
-
-    if input_str == "":
-        return None
-    if currentDate is None:
-        currentDate = datetime.now()
-
-    found = False
-    daySpecified = False
-    dayOffset = False
-    monthOffset = 0
-    yearOffset = 0
-    dateNow = currentDate
-    today = dateNow.strftime("%w")
-    currentYear = dateNow.strftime("%Y")
-    fromFlag = False
-    datestr = ""
-    hasYear = False
-    timeQualifier = ""
-
-    words = clean_string(input_str).split(" ")
-    timeQualifiersList = ['manha', 'tarde', 'noite']
-    time_indicators = ["em", "as", "nas", "pelas", "volta", "depois", "estas",
-                       "no", "dia", "hora"]
-    days = ['segunda', 'terca', 'quarta',
-            'quinta', 'sexta', 'sabado', 'domingo']
-    months = ['janeiro', 'febreiro', 'marco', 'abril', 'maio', 'junho',
-              'julho', 'agosto', 'setembro', 'outubro', 'novembro',
-              'dezembro']
-    monthsShort = ['jan', 'feb', 'mar', 'abr', 'mai', 'jun', 'jul', 'ag',
-                   'set', 'out', 'nov', 'dec']
-    nexts = ["proximo", "proxima"]
-    suffix_nexts = ["seguinte", "subsequente", "seguir"]
-    lasts = ["ultimo", "ultima"]
-    suffix_lasts = ["passada", "passado", "anterior", "antes"]
-    nxts = ["depois", "seguir", "seguida", "seguinte", "proxima", "proximo"]
-    prevs = ["antes", "ante", "previa", "previamente", "anterior"]
-    froms = ["partir", "em", "para", "na", "no", "daqui", "seguir",
-             "depois", "por", "proxima", "proximo", "da", "do", "de"]
-    thises = ["este", "esta", "deste", "desta", "neste", "nesta", "nesse",
-              "nessa"]
-    froms += thises
-    lists = nxts + prevs + froms + time_indicators
-    for idx, word in enumerate(words):
-        if word == "":
-            continue
-        wordPrevPrevPrev = words[idx - 3] if idx > 2 else ""
-        wordPrevPrev = words[idx - 2] if idx > 1 else ""
-        wordPrev = words[idx - 1] if idx > 0 else ""
-        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
-        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
-        wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
-
-        start = idx
-        used = 0
-        # save timequalifier for later
-        if word in timeQualifiersList:
-            timeQualifier = word
-
-        # parse today, tomorrow, yesterday
-        elif word == "hoje" and not fromFlag:
-            dayOffset = 0
-            used += 1
-        elif word == "amanha" and not fromFlag:
-            dayOffset = 1
-            used += 1
-        elif word == "ontem" and not fromFlag:
-            dayOffset -= 1
-            used += 1
-        # "before yesterday" and "before before yesterday"
-        elif (word == "anteontem" or
-              (word == "ante" and wordNext == "ontem")) and not fromFlag:
-            dayOffset -= 2
-            used += 1
-            if wordNext == "ontem":
-                used += 1
-        elif word == "ante" and wordNext == "ante" and wordNextNext == \
-                "ontem" and not fromFlag:
-            dayOffset -= 3
-            used += 3
-        elif word == "anteanteontem" and not fromFlag:
-            dayOffset -= 3
-            used += 1
-        # day after tomorrow
-        elif word == "depois" and wordNext == "amanha" and not fromFlag:
-            dayOffset += 2
-            used = 2
-        # day before yesterday
-        elif word == "antes" and wordNext == "ontem" and not fromFlag:
-            dayOffset -= 2
-            used = 2
-        # parse 5 days, 10 weeks, last week, next week, week after
-        elif word == "dia":
-            if wordNext == "depois" or wordNext == "antes":
-                used += 1
-                if wordPrev and wordPrev[0].isdigit():
-                    dayOffset += int(wordPrev)
-                    start -= 1
-                    used += 1
-            elif (wordPrev and wordPrev[0].isdigit() and
-                    wordNext not in months and
-                    wordNext not in monthsShort):
-                dayOffset += int(wordPrev)
-                start -= 1
-                used += 2
-            elif wordNext and wordNext[0].isdigit() and wordNextNext not in \
-                    months and wordNextNext not in monthsShort:
-                dayOffset += int(wordNext)
-                start -= 1
-                used += 2
-
-        elif word == "semana" and not fromFlag:
-            if wordPrev[0].isdigit():
-                dayOffset += int(wordPrev) * 7
-                start -= 1
-                used = 2
-            for w in nexts:
-                if wordPrev == w:
-                    dayOffset = 7
-                    start -= 1
-                    used = 2
-            for w in lasts:
-                if wordPrev == w:
-                    dayOffset = -7
-                    start -= 1
-                    used = 2
-            for w in suffix_nexts:
-                if wordNext == w:
-                    dayOffset = 7
-                    start -= 1
-                    used = 2
-            for w in suffix_lasts:
-                if wordNext == w:
-                    dayOffset = -7
-                    start -= 1
-                    used = 2
-        # parse 10 months, next month, last month
-        elif word == "mes" and not fromFlag:
-            if wordPrev[0].isdigit():
-                monthOffset = int(wordPrev)
-                start -= 1
-                used = 2
-            for w in nexts:
-                if wordPrev == w:
-                    monthOffset = 7
-                    start -= 1
-                    used = 2
-            for w in lasts:
-                if wordPrev == w:
-                    monthOffset = -7
-                    start -= 1
-                    used = 2
-            for w in suffix_nexts:
-                if wordNext == w:
-                    monthOffset = 7
-                    start -= 1
-                    used = 2
-            for w in suffix_lasts:
-                if wordNext == w:
-                    monthOffset = -7
-                    start -= 1
-                    used = 2
-        # parse 5 years, next year, last year
-        elif word == "ano" and not fromFlag:
-            if wordPrev[0].isdigit():
-                yearOffset = int(wordPrev)
-                start -= 1
-                used = 2
-            for w in nexts:
-                if wordPrev == w:
-                    yearOffset = 7
-                    start -= 1
-                    used = 2
-            for w in lasts:
-                if wordPrev == w:
-                    yearOffset = -7
-                    start -= 1
-                    used = 2
-            for w in suffix_nexts:
-                if wordNext == w:
-                    yearOffset = 7
-                    start -= 1
-                    used = 2
-            for w in suffix_lasts:
-                if wordNext == w:
-                    yearOffset = -7
-                    start -= 1
-                    used = 2
-        # parse Monday, Tuesday, etc., and next Monday,
-        # last Tuesday, etc.
-        elif word in days and not fromFlag:
-
-            d = days.index(word)
-            dayOffset = (d + 1) - int(today)
-            used = 1
-            if dayOffset < 0:
-                dayOffset += 7
-            for w in nexts:
-                if wordPrev == w:
-                    dayOffset += 7
-                    used += 1
-                    start -= 1
-            for w in lasts:
-                if wordPrev == w:
-                    dayOffset -= 7
-                    used += 1
-                    start -= 1
-            for w in suffix_nexts:
-                if wordNext == w:
-                    dayOffset += 7
-                    used += 1
-                    start -= 1
-            for w in suffix_lasts:
-                if wordNext == w:
-                    dayOffset -= 7
-                    used += 1
-                    start -= 1
-            if wordNext == "feira":
-                used += 1
-        # parse 15 of July, June 20th, Feb 18, 19 of February
-        elif word in months or word in monthsShort:
-            try:
-                m = months.index(word)
-            except ValueError:
-                m = monthsShort.index(word)
-            used += 1
-            datestr = months[m]
-            if wordPrev and wordPrev[0].isdigit():
-                # 13 maio
-                datestr += " " + wordPrev
-                start -= 1
-                used += 1
-                if wordNext and wordNext[0].isdigit():
-                    datestr += " " + wordNext
-                    used += 1
-                    hasYear = True
-                else:
-                    hasYear = False
-
-            elif wordNext and wordNext[0].isdigit():
-                # maio 13
-                datestr += " " + wordNext
-                used += 1
-                if wordNextNext and wordNextNext[0].isdigit():
-                    datestr += " " + wordNextNext
-                    used += 1
-                    hasYear = True
-                else:
-                    hasYear = False
-
-            elif wordPrevPrev and wordPrevPrev[0].isdigit():
-                # 13 dia maio
-                datestr += " " + wordPrevPrev
-
-                start -= 2
-                used += 2
-                if wordNext and word[0].isdigit():
-                    datestr += " " + wordNext
-                    used += 1
-                    hasYear = True
-                else:
-                    hasYear = False
-
-            elif wordNextNext and wordNextNext[0].isdigit():
-                # maio dia 13
-                datestr += " " + wordNextNext
-                used += 2
-                if wordNextNextNext and wordNextNextNext[0].isdigit():
-                    datestr += " " + wordNextNextNext
-                    used += 1
-                    hasYear = True
-                else:
-                    hasYear = False
-
-            if datestr in months:
-                datestr = ""
-
-        # parse 5 days from tomorrow, 10 weeks from next thursday,
-        # 2 months from July
-        validFollowups = days + months + monthsShort
-        validFollowups.append("hoje")
-        validFollowups.append("amanha")
-        validFollowups.append("ontem")
-        validFollowups.append("anteontem")
-        validFollowups.append("agora")
-        validFollowups.append("ja")
-        validFollowups.append("ante")
-
-        # TODO debug word "depois" that one is failing for some reason
-        if word in froms and wordNext in validFollowups:
-
-            if not (wordNext == "amanha" and wordNext == "ontem") and not (
-                    word == "depois" or word == "antes" or word == "em"):
-                used = 2
-                fromFlag = True
-            if wordNext == "amanha" and word != "depois":
-                dayOffset += 1
-            elif wordNext == "ontem":
-                dayOffset -= 1
-            elif wordNext == "anteontem":
-                dayOffset -= 2
-            elif wordNext == "ante" and wordNextNext == "ontem":
-                dayOffset -= 2
-            elif (wordNext == "ante" and wordNext == "ante" and
-                  wordNextNextNext == "ontem"):
-                dayOffset -= 3
-            elif wordNext in days:
-                d = days.index(wordNext)
-                tmpOffset = (d + 1) - int(today)
-                used = 2
-                if wordNextNext == "feira":
-                    used += 1
-                if tmpOffset < 0:
-                    tmpOffset += 7
-                if wordNextNext:
-                    if wordNextNext in nxts:
-                        tmpOffset += 7
-                        used += 1
-                    elif wordNextNext in prevs:
-                        tmpOffset -= 7
-                        used += 1
-                dayOffset += tmpOffset
-            elif wordNextNext and wordNextNext in days:
-                d = days.index(wordNextNext)
-                tmpOffset = (d + 1) - int(today)
-                used = 3
-                if wordNextNextNext:
-                    if wordNextNextNext in nxts:
-                        tmpOffset += 7
-                        used += 1
-                    elif wordNextNextNext in prevs:
-                        tmpOffset -= 7
-                        used += 1
-                dayOffset += tmpOffset
-                if wordNextNextNext == "feira":
-                    used += 1
-        if wordNext in months:
-            used -= 1
-        if used > 0:
-
-            if start - 1 > 0 and words[start - 1] in lists:
-                start -= 1
-                used += 1
-
-            for i in range(0, used):
-                words[i + start] = ""
-
-            if (start - 1 >= 0 and words[start - 1] in lists):
-                words[start - 1] = ""
-            found = True
-            daySpecified = True
-
-    # parse time
-    timeStr = ""
-    hrOffset = 0
-    minOffset = 0
-    secOffset = 0
-    hrAbs = 0
-    minAbs = 0
-    military = False
-
-    for idx, word in enumerate(words):
-        if word == "":
-            continue
-
-        wordPrevPrev = words[idx - 2] if idx > 1 else ""
-        wordPrev = words[idx - 1] if idx > 0 else ""
-        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
-        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
-        wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
-        # parse noon, midnight, morning, afternoon, evening
-        used = 0
-        if word == "meio" and wordNext == "dia":
-            hrAbs = 12
-            used += 2
-        elif word == "meia" and wordNext == "noite":
-            hrAbs = 0
-            used += 2
-        elif word == "manha":
-            if hrAbs == 0:
-                hrAbs = 8
-            used += 1
-        elif word == "tarde":
-            if hrAbs == 0:
-                hrAbs = 15
-            used += 1
-        elif word == "meio" and wordNext == "tarde":
-            if hrAbs == 0:
-                hrAbs = 17
-            used += 2
-        elif word == "meio" and wordNext == "manha":
-            if hrAbs == 0:
-                hrAbs = 10
-            used += 2
-        elif word == "fim" and wordNext == "tarde":
-            if hrAbs == 0:
-                hrAbs = 19
-            used += 2
-        elif word == "fim" and wordNext == "manha":
-            if hrAbs == 0:
-                hrAbs = 11
-            used += 2
-        elif word == "tantas" and wordNext == "manha":
-            if hrAbs == 0:
-                hrAbs = 4
-            used += 2
-        elif word == "noite":
-            if hrAbs == 0:
-                hrAbs = 22
-            used += 1
-        # parse half an hour, quarter hour
-        elif word == "hora" and \
-                (wordPrev in time_indicators or wordPrevPrev in
-                    time_indicators):
-            if wordPrev == "meia":
-                minOffset = 30
-            elif wordPrev == "quarto":
-                minOffset = 15
-            elif wordPrevPrev == "quarto":
-                minOffset = 15
-                if idx > 2 and words[idx - 3] in time_indicators:
-                    words[idx - 3] = ""
-                words[idx - 2] = ""
-            else:
-                hrOffset = 1
-            if wordPrevPrev in time_indicators:
-                words[idx - 2] = ""
-            words[idx - 1] = ""
-            used += 1
-            hrAbs = -1
-            minAbs = -1
-        # parse 5:00 am, 12:00 p.m., etc
-        elif word[0].isdigit():
-            isTime = True
-            strHH = ""
-            strMM = ""
-            remainder = ""
-            if ':' in word:
-                # parse colons
-                # "3:00 in the morning"
-                stage = 0
-                length = len(word)
-                for i in range(length):
-                    if stage == 0:
-                        if word[i].isdigit():
-                            strHH += word[i]
-                        elif word[i] == ":":
-                            stage = 1
-                        else:
-                            stage = 2
-                            i -= 1
-                    elif stage == 1:
-                        if word[i].isdigit():
-                            strMM += word[i]
-                        else:
-                            stage = 2
-                            i -= 1
-                    elif stage == 2:
-                        remainder = word[i:].replace(".", "")
-                        break
-                if remainder == "":
-                    nextWord = wordNext.replace(".", "")
-                    if nextWord == "am" or nextWord == "pm":
-                        remainder = nextWord
-                        used += 1
-                    elif wordNext == "manha":
-                        remainder = "am"
-                        used += 1
-                    elif wordNext == "tarde":
-                        remainder = "pm"
-                        used += 1
-                    elif wordNext == "noite":
-                        if 0 < int(word[0]) < 6:
-                            remainder = "am"
-                        else:
-                            remainder = "pm"
-                        used += 1
-                    elif wordNext in thises and wordNextNext == "manha":
-                        remainder = "am"
-                        used = 2
-                    elif wordNext in thises and wordNextNext == "tarde":
-                        remainder = "pm"
-                        used = 2
-                    elif wordNext in thises and wordNextNext == "noite":
-                        remainder = "pm"
-                        used = 2
-                    else:
-                        if timeQualifier != "":
-                            military = True
-                            if strHH <= 12 and \
-                                    (timeQualifier == "manha" or
-                                     timeQualifier == "tarde"):
-                                strHH += 12
-
-            else:
-                # try to parse # s without colons
-                # 5 hours, 10 minutes etc.
-                length = len(word)
-                strNum = ""
-                remainder = ""
-                for i in range(length):
-                    if word[i].isdigit():
-                        strNum += word[i]
-                    else:
-                        remainder += word[i]
-
-                if remainder == "":
-                    remainder = wordNext.replace(".", "").lstrip().rstrip()
-
-                if (
-                        remainder == "pm" or
-                        wordNext == "pm" or
-                        remainder == "p.m." or
-                        wordNext == "p.m."):
-                    strHH = strNum
-                    remainder = "pm"
-                    used = 1
-                elif (
-                        remainder == "am" or
-                        wordNext == "am" or
-                        remainder == "a.m." or
-                        wordNext == "a.m."):
-                    strHH = strNum
-                    remainder = "am"
-                    used = 1
-                else:
-                    if (wordNext == "pm" or
-                            wordNext == "p.m." or
-                            wordNext == "tarde"):
-                        strHH = strNum
-                        remainder = "pm"
-                        used = 1
-                    elif (wordNext == "am" or
-                          wordNext == "a.m." or
-                          wordNext == "manha"):
-                        strHH = strNum
-                        remainder = "am"
-                        used = 1
-                    elif (int(word) > 100 and
-                            (
-                                wordPrev == "o" or
-                                wordPrev == "oh" or
-                                wordPrev == "zero"
-                            )):
-                        # 0800 hours (pronounced oh-eight-hundred)
-                        strHH = int(word) / 100
-                        strMM = int(word) - strHH * 100
-                        military = True
-                        if wordNext == "hora":
-                            used += 1
-                    elif (
-                            wordNext == "hora" and
-                            word[0] != '0' and
-                            (
-                                int(word) < 100 and
-                                int(word) > 2400
-                            )):
-                        # ignores military time
-                        # "in 3 hours"
-                        hrOffset = int(word)
-                        used = 2
-                        isTime = False
-                        hrAbs = -1
-                        minAbs = -1
-
-                    elif wordNext == "minuto":
-                        # "in 10 minutes"
-                        minOffset = int(word)
-                        used = 2
-                        isTime = False
-                        hrAbs = -1
-                        minAbs = -1
-                    elif wordNext == "segundo":
-                        # in 5 seconds
-                        secOffset = int(word)
-                        used = 2
-                        isTime = False
-                        hrAbs = -1
-                        minAbs = -1
-                    elif int(word) > 100:
-                        strHH = int(word) / 100
-                        strMM = int(word) - strHH * 100
-                        military = True
-                        if wordNext == "hora":
-                            used += 1
-
-                    elif wordNext == "" or (
-                            wordNext == "em" and wordNextNext == "ponto"):
-                        strHH = word
-                        strMM = 00
-                        if wordNext == "em" and wordNextNext == "ponto":
-                            used += 2
-                            if wordNextNextNext == "tarde":
-                                remainder = "pm"
-                                used += 1
-                            elif wordNextNextNext == "manha":
-                                remainder = "am"
-                                used += 1
-                            elif wordNextNextNext == "noite":
-                                if 0 > strHH > 6:
-                                    remainder = "am"
-                                else:
-                                    remainder = "pm"
-                                used += 1
-
-                    elif wordNext[0].isdigit():
-                        strHH = word
-                        strMM = wordNext
-                        military = True
-                        used += 1
-                        if wordNextNext == "hora":
-                            used += 1
-                    else:
-                        isTime = False
-
-            strHH = int(strHH) if strHH else 0
-            strMM = int(strMM) if strMM else 0
-            strHH = strHH + 12 if (remainder == "pm" and
-                                   0 < strHH < 12) else strHH
-            strHH = strHH - 12 if (remainder == "am" and
-                                   0 < strHH >= 12) else strHH
-            if strHH > 24 or strMM > 59:
-                isTime = False
-                used = 0
-            if isTime:
-                hrAbs = strHH * 1
-                minAbs = strMM * 1
-                used += 1
-
-        if used > 0:
-            # removed parsed words from the sentence
-            for i in range(used):
-                words[idx + i] = ""
-
-            if wordPrev == "em" or wordPrev == "ponto":
-                words[words.index(wordPrev)] = ""
-
-            if idx > 0 and wordPrev in time_indicators:
-                words[idx - 1] = ""
-            if idx > 1 and wordPrevPrev in time_indicators:
-                words[idx - 2] = ""
-
-            idx += used - 1
-            found = True
-
-    # check that we found a date
-    if not date_found:
-        return None
-
-    if dayOffset is False:
-        dayOffset = 0
-
-    # perform date manipulation
-
-    extractedDate = dateNow
-    extractedDate = extractedDate.replace(microsecond=0,
-                                          second=0,
-                                          minute=0,
-                                          hour=0)
-    if datestr != "":
-        en_months = ['january', 'february', 'march', 'april', 'may', 'june',
-                     'july', 'august', 'september', 'october', 'november',
-                     'december']
-        en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
-                          'aug',
-                          'sept', 'oct', 'nov', 'dec']
-        for idx, en_month in enumerate(en_months):
-            datestr = datestr.replace(months[idx], en_month)
-        for idx, en_month in enumerate(en_monthsShort):
-            datestr = datestr.replace(monthsShort[idx], en_month)
-
-        temp = datetime.strptime(datestr, "%B %d")
-        if not hasYear:
-            temp = temp.replace(year=extractedDate.year)
-            if extractedDate < temp:
-                extractedDate = extractedDate.replace(year=int(currentYear),
-                                                      month=int(
-                                                          temp.strftime(
-                                                              "%m")),
-                                                      day=int(temp.strftime(
-                                                          "%d")))
-            else:
-                extractedDate = extractedDate.replace(
-                    year=int(currentYear) + 1,
-                    month=int(temp.strftime("%m")),
-                    day=int(temp.strftime("%d")))
-        else:
-            extractedDate = extractedDate.replace(
-                year=int(temp.strftime("%Y")),
-                month=int(temp.strftime("%m")),
-                day=int(temp.strftime("%d")))
-
-    if timeStr != "":
-        temp = datetime(timeStr)
-        extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
-                                              minute=temp.strftime("%M"),
-                                              second=temp.strftime("%S"))
-
-    if yearOffset != 0:
-        extractedDate = extractedDate + relativedelta(years=yearOffset)
-    if monthOffset != 0:
-        extractedDate = extractedDate + relativedelta(months=monthOffset)
-    if dayOffset != 0:
-        extractedDate = extractedDate + relativedelta(days=dayOffset)
-    if hrAbs != -1 and minAbs != -1:
-
-        extractedDate = extractedDate + relativedelta(hours=hrAbs,
-                                                      minutes=minAbs)
-        if (hrAbs != 0 or minAbs != 0) and datestr == "":
-            if not daySpecified and dateNow > extractedDate:
-                extractedDate = extractedDate + relativedelta(days=1)
-    if hrOffset != 0:
-        extractedDate = extractedDate + relativedelta(hours=hrOffset)
-    if minOffset != 0:
-        extractedDate = extractedDate + relativedelta(minutes=minOffset)
-    if secOffset != 0:
-        extractedDate = extractedDate + relativedelta(seconds=secOffset)
-
-    resultStr = " ".join(words)
-    resultStr = ' '.join(resultStr.split())
-    resultStr = pt_pruning(resultStr)
-    return [extractedDate, resultStr]
-
-
-def pt_pruning(text, symbols=True, accents=True, agressive=True):
-    # agressive pt word pruning
-    words = ["a", "o", "os", "as", "de", "dos", "das",
-             "lhe", "lhes", "me", "e", "no", "nas", "na", "nos", "em", "para",
-             "este",
-             "esta", "deste", "desta", "neste", "nesta", "nesse",
-             "nessa", "foi", "que"]
-    if symbols:
-        symbols = [".", ",", ";", ":", "!", "?", u"�", u"�"]
-        for symbol in symbols:
-            text = text.replace(symbol, "")
-        text = text.replace("-", " ").replace("_", " ")
-    if accents:
-        accents = {"a": [u"�", u"�", u"�", u"�"],
-                   "e": [u"�", u"�", u"�"],
-                   "i": [u"�", u"�"],
-                   "o": [u"�", u"�"],
-                   "u": [u"�", u"�"],
-                   "c": [u"�"]}
-        for char in accents:
-            for acc in accents[char]:
-                text = text.replace(acc, char)
-    if agressive:
-        text_words = text.split(" ")
-        for idx, word in enumerate(text_words):
-            if word in words:
-                text_words[idx] = ""
-        text = " ".join(text_words)
-        text = ' '.join(text.split())
-    return text
-
-
-def get_gender_pt(word, raw_string=""):
-    word = word.rstrip("s")
-    gender = False
-    words = raw_string.split(" ")
-    for idx, w in enumerate(words):
-        if w == word and idx != 0:
-            previous = words[idx - 1]
-            gender = get_gender_pt(previous)
-            break
-    if not gender:
-        if word[-1] == "a":
-            gender = "f"
-        if word[-1] == "o" or word[-1] == "e":
-            gender = "m"
-    return gender
-
-
-####################################################################
-# Spanish normalization
-#
-# TODO: numbers greater than 999999
-####################################################################
-
-# Undefined articles ["un", "una", "unos", "unas"] can not be supressed,
-# in Spanish, "un caballo" means "a horse" or "one horse".
-es_articles = ["el", "la", "los", "las"]
-
-es_numbers_xlat = {
-    "un": 1,
-    "uno": 1,
-    "una": 1,
-    "dos": 2,
-    "tres": 3,
-    u"tr�s": 3,
-    "cuatro": 4,
-    "cinco": 5,
-    "seis": 6,
-    "siete": 7,
-    "ocho": 8,
-    "nueve": 9,
-    "diez": 10,
-    "once": 11,
-    "doce": 12,
-    "trece": 13,
-    "catorce": 14,
-    "quince": 15,
-    "dieciseis": 16,
-    u"diecis�is": 16,
-    "diecisiete": 17,
-    "dieciocho": 18,
-    "diecinueve": 19,
-    "veinte": 20,
-    "veintiuno": 21,
-    u"veintid�s": 22,
-    u"veintitr�s": 23,
-    "veintidos": 22,
-    "veintitres": 23,
-    "veinticuatro": 24,
-    "veinticinco": 25,
-    u"veintis�is": 26,
-    "veintiseis": 26,
-    "veintisiete": 27,
-    "veintiocho": 28,
-    "veintinueve": 29,
-    "treinta": 30,
-    "cuarenta": 40,
-    "cincuenta": 50,
-    "sesenta": 60,
-    "setenta": 70,
-    "ochenta": 80,
-    "noventa": 90,
-    "cien": 100,
-    "ciento": 100,
-    "doscientos": 200,
-    "doscientas": 200,
-    "trescientos": 300,
-    "trescientas": 300,
-    "cuatrocientos": 400,
-    "cuatrocientas": 400,
-    "quinientos": 500,
-    "quinientas": 500,
-    "seiscientos": 600,
-    "seiscientas": 600,
-    "setecientos": 700,
-    "setecientas": 700,
-    "ochocientos": 800,
-    "ochocientas": 800,
-    "novecientos": 900,
-    "novecientas": 900}
-
-
-def es_parse(words, i):
-    def es_cte(i, s):
-        if i < len(words) and s == words[i]:
-            return s, i + 1
-        return None
-
-    def es_number_word(i, mi, ma):
-        if i < len(words):
-            v = es_numbers_xlat.get(words[i])
-            if v and v >= mi and v <= ma:
-                return v, i + 1
-        return None
-
-    def es_number_1_99(i):
-        r1 = es_number_word(i, 1, 29)
-        if r1:
-            return r1
-
-        r1 = es_number_word(i, 30, 90)
-        if r1:
-            v1, i1 = r1
-            r2 = es_cte(i1, "y")
-            if r2:
-                v2, i2 = r2
-                r3 = es_number_word(i2, 1, 9)
-                if r3:
-                    v3, i3 = r3
-                    return v1 + v3, i3
-            return r1
-        return None
-
-    def es_number_1_999(i):
-        # [2-9]cientos [1-99]?
-        r1 = es_number_word(i, 100, 900)
-        if r1:
-            v1, i1 = r1
-            r2 = es_number_1_99(i1)
-            if r2:
-                v2, i2 = r2
-                return v1 + v2, i2
-            else:
-                return r1
-
-        # [1-99]
-        r1 = es_number_1_99(i)
-        if r1:
-            return r1
-
-        return None
-
-    def es_number(i):
-        # check for cero
-        r1 = es_number_word(i, 0, 0)
-        if r1:
-            return r1
-
-        # check for [1-999] (mil [0-999])?
-        r1 = es_number_1_999(i)
-        if r1:
-            v1, i1 = r1
-            r2 = es_cte(i1, "mil")
-            if r2:
-                v2, i2 = r2
-                r3 = es_number_1_999(i2)
-                if r3:
-                    v3, i3 = r3
-                    return v1 * 1000 + v3, i3
-                else:
-                    return v1 * 1000, i2
-            else:
-                return r1
-        return None
-
-    return es_number(i)
-
-
-def normalize_es(text, remove_articles):
-    """ Spanish string normalization """
-
-    words = text.split()  # this also removed extra spaces
-
-    normalized = ""
-    i = 0
-    while i < len(words):
-        word = words[i]
-
-        if remove_articles and word in es_articles:
-            i += 1
-            continue
-
-        # Convert numbers into digits
-        r = es_parse(words, i)
-        if r:
-            v, i = r
-            normalized += " " + str(v)
-            continue
-
-        normalized += " " + word
-        i += 1
-
-    return normalized[1:]  # strip the initial space
diff --git a/test/unittests/util/test_parse.py b/test/unittests/util/test_parse.py
index b301ab9af4..ba53eae436 100644
--- a/test/unittests/util/test_parse.py
+++ b/test/unittests/util/test_parse.py
@@ -375,7 +375,7 @@ class TestNormalize(unittest.TestCase):
         self.assertEqual(
             extractnumber("tres quartos de chocolate", lang="pt"),
             3.0 / 4.0)
-        self.assertEqual(extractnumber(u"tr�s quarto de chocolate",
+        self.assertEqual(extractnumber(u"tr�s quarto de chocolate",
                                        lang="pt"), 3.0 / 4.0)
         self.assertEqual(extractnumber("sete ponto cinco", lang="pt"), 7.5)
         self.assertEqual(extractnumber("sete ponto 5", lang="pt"), 7.5)
@@ -420,9 +420,9 @@ class TestNormalize(unittest.TestCase):
                          "isto e 1 teste")
 
     def test_numbers_pt(self):
-        self.assertEqual(normalize(u"isto e o um dois tr�s teste", lang="pt"),
+        self.assertEqual(normalize(u"isto e o um dois tr�s teste", lang="pt"),
                          u"isto 1 2 3 teste")
-        self.assertEqual(normalize(u"� a sete oito nove  test", lang="pt"),
+        self.assertEqual(normalize(u"� a sete oito nove  test", lang="pt"),
                          u"7 8 9 test")
         self.assertEqual(
             normalize("teste zero dez onze doze treze", lang="pt"),
@@ -459,9 +459,9 @@ class TestNormalize(unittest.TestCase):
             self.assertEqual(res[0], expected_date)
             self.assertEqual(res[1], expected_leftover)
 
-        testExtract(u"que dia � hoje",
+        testExtract(u"que dia � hoje",
                     "2017-06-27 00:00:00", u"dia")
-        testExtract(u"que dia � amanha",
+        testExtract(u"que dia � amanha",
                     "2017-06-28 00:00:00", u"dia")
         testExtract(u"que dia foi ontem",
                     "2017-06-26 00:00:00", u"dia")
@@ -513,7 +513,7 @@ class TestNormalize(unittest.TestCase):
         testExtract("lembra me para ligar a mae no dia 3 de agosto",
                     "2017-08-03 00:00:00", "lembra ligar mae")
 
-        testExtract(u"compra facas no 13� dia de maio",
+        testExtract(u"compra facas no 13� dia de maio",
                     "2018-05-13 00:00:00", "compra facas")
         testExtract(u"gasta dinheiro no maio dia 13",
                     "2018-05-13 00:00:00", "gasta dinheiro")
@@ -588,7 +588,7 @@ class TestNormalize(unittest.TestCase):
         self.assertEqual(normalize("diez once doce trece catorce quince",
                                    lang="es"),
                          "10 11 12 13 14 15")
-        self.assertEqual(normalize(u"diecis�is diecisiete", lang="es"),
+        self.assertEqual(normalize(u"diecis�is diecisiete", lang="es"),
                          "16 17")
         self.assertEqual(normalize(u"dieciocho diecinueve", lang="es"),
                          "18 19")
@@ -609,7 +609,7 @@ class TestNormalize(unittest.TestCase):
                                    lang="es"),
                          "2345")
         self.assertEqual(normalize(
-            u"ciento veintitr�s mil cuatrocientas cincuenta y seis",
+            u"ciento veintitr�s mil cuatrocientas cincuenta y seis",
             lang="es"),
             "123456")
         self.assertEqual(normalize(