mycroft-core/mycroft/util/lang/parse_it.py

# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
    Parse functions for Italian (IT-IT)

    TODO: numbers greater than 999999
    TODO: it_number_parse
    TODO: it_pruning

"""

from datetime import datetime
from dateutil.relativedelta import relativedelta
from mycroft.util.lang.parse_common import is_numeric, look_for_fractions


# Undefined articles ["un", "una", "un'"] can not be supressed,
# in Italian, "un cavallo" means "a horse" or "one horse".
it_articles = ["il", "lo", "la", "i", "gli", "le"]

it_numbers = {
    "zero": 0,
    "un": 1,
    "uno": 1,
    "una": 1,
    "un'": 1,
    "due": 2,
    "tre": 3,
    "quattro": 4,
    "cinque": 5,
    "sei": 6,
    "sette": 7,
    "otto": 8,
    "nove": 9,
    "dieci": 10,
    "undici": 11,
    "dodici": 12,
    "tredici": 13,
    "quattordici": 14,
    "quindici": 15,
    "sedici": 16,
    "diciassette": 17,
    "diciotto": 18,
    "diciannove": 19,
    "venti": 20,
    "vent": 20,
    "trenta": 30,
    "trent": 30,
    "quaranta": 40,
    "quarant": 40,
    "cinquanta": 50,
    "cinquant": 50,
    "sessanta": 60,
    "sessant": 60,
    "settanta": 70,
    "settant": 70,
    "ottanta": 80,
    "ottant": 80,
    "novanta": 90,
    "novant": 90,
    "cento": 100,
    "duecento": 200,
    "trecento": 300,
    "quattrocento": 400,
    "cinquecento": 500,
    "seicento": 600,
    "settecento": 700,
    "ottocento": 800,
    "novecento": 900,
    "primo": 1,
    "secondo": 2,
    "mille": 1000,
    "mila": 1000
}


def isFractional_it(input_str):
    """
    This function takes the given text and checks if it is a fraction.
    E' la versione portoghese riadattata in italiano

    Args:
        text (str): the string to check if fractional
    Returns:
        (bool) or (float): False if not a fraction, otherwise the fraction

    TODO:  verificare la corretta gestione dei plurali
    """

    aFrac = ["mezz", "terz", "quart", "quint", "sest", "settim", "ottav",
             "non", "decim", "undicesim", "dodicesim", "tredicesim",
             "quattrodicesim", "quindicesim", "sedicesim",
             "diciasettesim", "diciottesim", "diciasettesim",
             "diciannovesim"]

    if input_str[:-1].lower() in aFrac:
        return 1.0 / (aFrac.index(input_str[:-1]) + 2)
    if input_str[:-1] == "ventesim":
        return 1.0 / 20
    if input_str[:-1] == "centesim":
        return 1.0 / 100
    if input_str[:-1] == "millesim":
        return 1.0 / 1000

    return False


def extractnumber_long_it(word):
    """
    Questa funzione converte un numero testuale lungo es.
    ventisette -> 27
    quarantuno -> 41
    nell'equivalente valore intero
     args:
         text (str): la stringa da normalizzare
    Ritorna:
         (int) : il valore del numero estratto usando tutta la parola
         Falso : se la parola non è un numero es."qualcuno"
    """
    result = False
    value = False

    for number in it_numbers.keys():  # ciclo unità
        if word.endswith(number):
            result = True
            value = it_numbers[number]
            word = word[0: len(word) - len(number)]
            break

    if result:  # tolte le unità, dovrebbe rimanere una stringa nota
        if word in it_numbers:
            value += it_numbers[word]
        else:
            value = False  # non è un numero es. qualcuno

    return value


def extractnumber_it(text):
    """
    Questa funzione prepara il testo dato per l'analisi rendendo
    numeri testuali come interi o frazioni.
    In italiano non è un modo abituale ma può essere interessante
    per Mycroft
    E' la versione portoghese riadattata in italiano
     args:
         text (str): la stringa da normalizzare
    Ritorna:
         (int) o (float): il valore del numero estratto

    """
    aWords = text.split()
    count = 0
    result = None
    while count < len(aWords):
        val = 0
        word = aWords[count]
        next_next_word = None
        if count + 1 < len(aWords):
            next_word = aWords[count + 1]
            if count + 2 < len(aWords):
                next_next_word = aWords[count + 2]
        else:
            next_word = None

        # is current word a number?
        if word in it_numbers:
            if word == "mila":
                val = it_numbers[word]
                val = result * val
                result = 0
            else:
                val = it_numbers[word]

        elif word.isdigit():  # doesn't work with decimals
            val = int(word)
        elif is_numeric(word):
            val = float(word)

        elif isFractional_it(word):
            if not result:
                result = 1
            result = result * isFractional_it(word)
            # "un terzo" is 1/3 but "il terzo" is 3
            if aWords[count - 1] == "il":
                result = 1.0 // isFractional_it(word)

            count += 1
            continue

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        if not val:
            # cerca numero composto come ventuno ventitre centoventi"
            val = extractnumber_long_it(word)

        if val:
            if result is None:
                result = 0
            # handle fractions
            # if next_word != "avos":
            result += val
            # else:
            #    result = float(result) / float(val)

        if next_word is None:
            break

        # number word and fraction
        ands = ["e"]
        if next_word in ands:
            zeros = 0
            if result is None:
                count += 1
                continue
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "

            afterAndVal = extractnumber_it(newText[:-1])
            if afterAndVal:
                if result < afterAndVal or result < 20:
                    while afterAndVal > 1:
                        afterAndVal = afterAndVal / 10.0
                    for word in newWords:
                        if word == "zero" or word == "0":
                            zeros += 1
                        else:
                            break
                for _ in range(0, zeros):
                    afterAndVal = afterAndVal / 10.0
                result += afterAndVal
                break
        elif next_next_word is not None:
            if next_next_word in ands:
                newWords = aWords[count + 3:]
                newText = ""
                for word in newWords:
                    newText += word + " "
                afterAndVal = extractnumber_it(newText[:-1])
                if afterAndVal:
                    if result is None:
                        result = 0
                    result += afterAndVal
                    break

        decimals = ["punto", "virgola", ".", ","]
        if next_word in decimals:
            zeros = 0
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "
            for word in newWords:
                if word == "zero" or word == "0":
                    zeros += 1
                else:
                    break
            afterDotVal = str(extractnumber_it(newText[:-1]))
            afterDotVal = zeros * "0" + afterDotVal
            result = float(str(result) + "." + afterDotVal)
            break
        count += 1

    if result is None:
        return False

    # Return the $str with the number related words removed
    # (now empty strings, so strlen == 0)
    # aWords = [word for word in aWords if len(word) > 0]
    # text = ' '.join(aWords)
    if "." in str(result):
        integer, dec = str(result).split(".")
        # cast float to int
        if dec == "0":
            result = int(integer)

    return result


def normalize_it(text, remove_articles):
    """ IT string normalization """

    words = text.split()  # this also removed extra spaces
    # Contractions are not common in IT
    # Convert numbers into digits, e.g. "quarantadue" -> "42"
    normalized = ""
    i = 0

    while i < len(words):
        word = words[i]
        # remove articles
        # Italian requires the article to define the gender
        if remove_articles and word in it_articles:
            i += 1
            continue

        if word in it_numbers:
            word = str(it_numbers[word])

        val = extractnumber_long_it(word)

        if val:
            word = str(val)

        normalized += " " + word
        i += 1
    # indefinite articles in it-it can not be removed

    return normalized[1:]


def extract_datetime_it(string, currentDate=None):
    def clean_string(s):
        """
            cleans the input string of unneeded punctuation and capitalization
            among other things.
            Normalize italian plurals
        """
        symbols = [".", ",", ";", "?", "!", u"º", u"ª", u"°"]

        for word in symbols:
            s = s.replace(word, "")

        s = s.lower().replace(
            u"á",
            "a").replace(
            u"à",
            "a").replace(
            u"è",
            "e'").replace(
            u"é",
            "e'").replace(
            u"ì",
            "i").replace(
            u"ù",
            "u").replace(
            u"ò",
            "o").replace(
            "-",
            " ").replace(
            "_",
            "")

        noise_words = ["tra", "la", "del", "al", "il", "di",
                       "le", "per", "alle", "alla", "dai", "delle",
                       "a", "e'", "era", "questa", "questo", "e"]

        for word in noise_words:
            s = s.replace(" " + word + " ", " ")

        # normalizza plurali per semplificare analisi
        s = s.replace(
            "secondi",
            "secondo").replace(
            "minuti",
            "minuto").replace(
            "ore",
            "ora").replace(
            "giorni",
            "giorno").replace(
            "settimane",
            "settimana").replace(
            "mesi",
            "mese").replace(
            "anni",
            "anno").replace(
            "mattino",
            "mattina").replace(
            "prossima",
            "prossimo").replace(
            "questa",
            "questo").replace(
            "quarti",
            "quarto")

        wordList = s.split()
        # print(wordList)  # debug only

        return wordList

    def date_found():
        return found or \
            (
                datestr != "" or timeStr != "" or
                yearOffset != 0 or monthOffset != 0 or
                dayOffset is True or hrOffset != 0 or
                hrAbs != 0 or minOffset != 0 or
                minAbs != 0 or secOffset != 0
            )

    if string == "":
        return None
    if currentDate is None:
        currentDate = datetime.now()

    found = False
    daySpecified = False
    dayOffset = False
    monthOffset = 0
    yearOffset = 0
    dateNow = currentDate
    today = dateNow.strftime("%w")
    currentYear = dateNow.strftime("%Y")
    fromFlag = False
    datestr = ""
    hasYear = False
    timeQualifier = ""

    timeQualifiersList = ['mattina', 'pomeriggio', 'sera']
    markers = ['alle', 'in', 'questo',  'per', 'di']
    days = ['lunedi', 'martedi', 'mercoledi',
            'giovedi', 'venerdi', 'sabato', 'domenica']
    months = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
              'luglio', 'agosto', 'settembre', 'ottobre', 'novembre',
              'dicembre']
    monthsShort = ['gen', 'feb', 'mar', 'apr', 'mag', 'giu', 'lug', 'ago',
                   'set', 'ott', 'nov', 'dic']

    words = clean_string(string)

    for idx, word in enumerate(words):
        if word == "":
            continue
        wordPrevPrev = words[idx - 2] if idx > 1 else ""
        wordPrev = words[idx - 1] if idx > 0 else ""
        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
        # wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
        # possono esistere casi dove servano tre parole di profondità ?
        start = idx
        used = 0
        # save timequalifier for later
        if word in timeQualifiersList:
            timeQualifier = word
            # parse today, tomorrow, day after tomorrow
        elif word == "oggi" and not fromFlag:
            dayOffset = 0
            used += 1
        elif word == "domani" and not fromFlag:
            dayOffset = 1
            used += 1
        elif word == "ieri" and not fromFlag:
            dayOffset -= 1
            used += 1
        elif word == "dopodomani" and not fromFlag:  # after tomorrow
            dayOffset += 2
            used += 1
        elif word == "dopo" and wordNext == "domani" and \
                not fromFlag:
            dayOffset += 1
            used += 2
        elif word == "giorno":
            if wordPrev[0].isdigit():
                dayOffset += int(wordPrev)
                start -= 1
                used = 2
                if wordNext == "dopo" and wordNextNext == "domani":
                    dayOffset += 1
                    used += 2
        elif word == "settimana" and not fromFlag:
            if wordPrev[0].isdigit():
                dayOffset += int(wordPrev) * 7
                start -= 1
                used = 2
            elif wordPrev == "prossimo":
                dayOffset = 7
                start -= 1
                used = 2
            elif wordPrev == "passato":
                dayOffset = -7
                start -= 1
                used = 2
                # parse 10 months, next month, last month
        elif word == "mese" and not fromFlag:
            if wordPrev[0].isdigit():
                monthOffset = int(wordPrev)
                start -= 1
                used = 2
            elif wordPrev == "prossimo":
                monthOffset = 1
                start -= 1
                used = 2
            elif wordPrev == "passato":
                monthOffset = -1
                start -= 1
                used = 2
                # parse 5 years, next year, last year
        elif word == "anno" and not fromFlag:
            if wordPrev[0].isdigit():
                yearOffset = int(wordPrev)
                start -= 1
                used = 2
            elif wordPrev == "prossimo":
                yearOffset = 1
                start -= 1
                used = 2
            elif wordPrev == "passato":
                yearOffset = -1
                start -= 1
                used = 2
                # parse Monday, Tuesday, etc., and next Monday,
                # last Tuesday, etc.
        elif word in days and not fromFlag:
            d = days.index(word)
            dayOffset = (d + 1) - int(today)
            used = 1
            if dayOffset < 0:
                dayOffset += 7
            if wordPrev == "prossimo":
                dayOffset += 7
                used += 1
                start -= 1
            elif wordPrev == "passato":
                dayOffset -= 7
                used += 1
                start -= 1
            if wordNext == "prossimo":
                # dayOffset += 7
                used += 1
            elif wordNext == "passato":
                # dayOffset -= 7
                used += 1
                # parse 15 of July, June 20th, Feb 18, 19 of February
        elif word in months or word in monthsShort and not fromFlag:
            try:
                m = months.index(word)
            except ValueError:
                m = monthsShort.index(word)
            used += 1
            datestr = months[m]
            if wordPrev and (wordPrev[0].isdigit()):
                datestr += " " + wordPrev
                start -= 1
                used += 1
                if wordNext and wordNext[0].isdigit():
                    datestr += " " + wordNext
                    used += 1
                    hasYear = True
                else:
                    hasYear = False

            elif wordNext and wordNext[0].isdigit():
                datestr += " " + wordNext
                used += 1
                if wordNextNext and wordNextNext[0].isdigit():
                    datestr += " " + wordNextNext
                    used += 1
                    hasYear = True
                else:
                    hasYear = False
        # parse 5 days from tomorrow, 10 weeks from next thursday,
        # 2 months from July
        validFollowups = days + months + monthsShort
        validFollowups.append("oggi")
        validFollowups.append("domani")
        validFollowups.append("prossimo")
        validFollowups.append("passato")
        validFollowups.append("ora")
        if (word == "da" or word == "dopo") and wordNext in validFollowups:
            used = 2
            fromFlag = True
            if wordNext == "domani":
                dayOffset += 1
            elif wordNext in days:
                d = days.index(wordNext)
                tmpOffset = (d + 1) - int(today)
                used = 2
                if tmpOffset < 0:
                    tmpOffset += 7
                dayOffset += tmpOffset
            elif wordNextNext and wordNextNext in days:
                d = days.index(wordNextNext)
                tmpOffset = (d + 1) - int(today)
                used = 3
                if wordNext == "prossimo":
                    tmpOffset += 7
                    used += 2  # era 1
                    start -= 1
                elif wordNext == "passato":
                    tmpOffset -= 7
                    used += 1
                    start -= 1
                dayOffset += tmpOffset
        if used > 0:
            if start - 1 > 0 and words[start - 1] == "questo":
                start -= 1
                used += 1

            for i in range(0, used):
                words[i + start] = ""

            if start - 1 >= 0 and words[start - 1] in markers:
                words[start - 1] = ""
            found = True
            daySpecified = True

    # parse time
    timeStr = ""
    hrOffset = 0
    minOffset = 0
    secOffset = 0
    hrAbs = 0
    minAbs = 0

    for idx, word in enumerate(words):
        if word == "":
            continue

        wordPrevPrev = words[idx - 2] if idx > 1 else ""
        wordPrev = words[idx - 1] if idx > 0 else ""
        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
        # wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
        # TODO verfica se esistono casi dove serva profindita 3 x analisi
        # parse noon, midnight, morning, afternoon, evening
        used = 0
        if word == "mezzogiorno":
            hrAbs = 12
            used += 1
        elif word == "mezzanotte":
            hrAbs = 24
            used += 1
        if word == "mezzo" and wordNext == "giorno":  # if stt splits the word
            hrAbs = 12
            used += 2
        elif word == "mezza"and wordNext == "notte":  # if stt splits the word
            hrAbs = 24
            used += 2
        elif word == "mattina":
            if hrAbs == 0:
                hrAbs = 8
            used += 1
            if wordNext and wordNext[0].isdigit():  # mattina alle 5
                hrAbs = int(wordNext)
                used += 1
        elif word == "pomeriggio":
            if hrAbs == 0:
                hrAbs = 15
            used += 1
            if wordNext and wordNext[0].isdigit():  # pomeriggio alle 5
                hrAbs = int(wordNext)
                used += 1
                if hrAbs < 12:
                    hrAbs += 12
        elif word == "sera":
            if hrAbs == 0:
                hrAbs = 19
            used += 1
            if wordNext and wordNext[0].isdigit():  # sera alle 8
                hrAbs = int(wordNext)
                used += 1
                if hrAbs < 12:
                    hrAbs += 12

        # parse 5:00 am, 12:00 p.m., etc
        elif word[0].isdigit():
            isTime = True
            strHH = ""
            strMM = ""
            remainder = ""
            if ':' in word:
                # parse colons
                # "3:00 in the morning"
                stage = 0
                length = len(word)
                for i in range(length):
                    if stage == 0:
                        if word[i].isdigit():
                            strHH += word[i]

                        elif word[i] == ":":
                            stage = 1
                        else:
                            stage = 2
                            i -= 1
                    elif stage == 1:
                        if word[i].isdigit():
                            strMM += word[i]
                        else:
                            stage = 2
                            i -= 1
                    elif stage == 2:
                        remainder = word[i:].replace(".", "")
                        break

                if remainder == "":
                    nextWord = wordNext.replace(".", "")
                    if nextWord == "am" or nextWord == "pm":
                        remainder = nextWord
                        used += 1
                    elif nextWord == "sera":
                        remainder = "pm"
                        used += 1
                    elif wordNext == "mattina":
                        remainder = "am"
                        used += 1
                    elif wordNext == "pomeriggio":
                        remainder = "pm"
                        used += 1
                    elif wordNext == "notte":
                        remainder = "pm"
                        used += 1
                    elif wordNext == "di" and wordNextNext == "notte":
                        if strHH > 5:
                            remainder = "pm"
                        else:
                            remainder = "am"
                        used += 2
                    else:
                        if timeQualifier != "":
                            if strHH <= 12 and \
                                    (timeQualifier == "sera" or
                                     timeQualifier == "pomeriggio"):
                                strHH += 12

            else:
                # try to parse # s without colons
                # 5 hours, 10 minutes etc.
                length = len(word)
                strNum = ""
                remainder = ""
                for i in range(length):
                    if word[i].isdigit():
                        strNum += word[i]
                    else:
                        remainder += word[i]

                if remainder == "":
                    remainder = wordNext.replace(".", "").lstrip().rstrip()

                if (
                        remainder == "pm" or
                        wordNext == "pm" or
                        remainder == "p.m." or
                        wordNext == "p.m."):
                    strHH = strNum
                    remainder = "pm"
                    used = 1
                elif (
                        remainder == "am" or
                        wordNext == "am" or
                        remainder == "a.m." or
                        wordNext == "a.m."):
                    strHH = strNum
                    remainder = "am"
                    used = 1
                else:
                    if wordNext == "pm" or wordNext == "p.m.":
                        strHH = strNum
                        remainder = "pm"
                        used = 1
                    elif wordNext == "am" or wordNext == "a.m.":
                        strHH = strNum
                        remainder = "am"
                        used = 1
                    elif (
                            int(word) > 100 and
                            (
                                wordPrev == "o" or
                                wordPrev == "oh"
                            )):
                        # 0800 hours (pronounced oh-eight-hundred)
                        strHH = int(word) / 100
                        strMM = int(word) - strHH * 100
                        if wordNext == "ora":
                            used += 1

                    elif (
                            wordNext == "ora" and
                            word[0] != '0' and
                            (
                                int(word) < 100 and
                                int(word) > 2400
                            )):
                        # ignores military time
                        # "in 3 hours"
                        hrOffset = int(word)
                        used = 2
                        isTime = False
                        hrAbs = -1
                        minAbs = -1
                    elif wordNext == "mattina":
                        # " 11 del mattina"  -> del viene rimosso
                        hh = int(word)
                        used = 2
                        isTime = False
                        hrAbs = hh
                        minAbs = 00
                    elif wordNext == "pomeriggio":
                        # " 2 del pomeriggio"  -> del viene rimosso
                        hh = int(word)
                        if hh < 12:
                            hh += 12
                        used = 2
                        isTime = False
                        hrAbs = hh
                        minAbs = 00
                    elif wordNext == "sera":
                        # "alle 8 di sera"  -> alle viene rimosso
                        hh = int(word)
                        if hh < 12:
                            hh += 12
                        used = 2
                        isTime = False
                        hrAbs = hh
                        minAbs = 00
                    # parse half an hour : undici e mezza
                    elif wordNext and wordNext == "mezza":
                        hrAbs = int(word)
                        minAbs = 30
                        used = 2
                        isTime = False
                    # parse 1 quarter hour 3 quarters : dieci e tre quarti
                    elif word and wordNext and \
                            wordNext == "quarto" and word[0].isdigit():
                        minAbs = 15 * int(word)
                        used = 2
                        if minAbs > 45:  # elimina eventuali errori
                            minAbs = 0
                            used -= 2
                        isTime = False
                    elif wordNext == "minuto":
                        # "in 10 minutes"
                        minOffset = int(word)
                        used = 2
                        isTime = False
                        hrAbs = -1
                        minAbs = -1
                    elif wordNext == "secondo":
                        # in 5 seconds
                        secOffset = int(word)
                        used = 2
                        isTime = False
                        hrAbs = -1
                        minAbs = -1
                    elif int(word) > 100:
                        strHH = int(word) / 100
                        strMM = int(word) - strHH * 100
                        if wordNext == "ora":
                            used += 1
                    elif wordNext and wordNext[0].isdigit():
                        strHH = word
                        strMM = wordNext
                        used += 1
                        if wordNextNext == "ora":
                            used += 1
                    elif wordNext == "in" and wordNextNext == "punto":
                        strHH = word
                        strMM = 00
                        used += 2

                    else:
                        isTime = False

            strHH = int(strHH) if strHH else 0
            strMM = int(strMM) if strMM else 0
            strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
            strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
            if strHH > 24 or strMM > 59:
                isTime = False
                used = 0
            if isTime:
                hrAbs = strHH * 1
                minAbs = strMM * 1
                used += 1

            if hrAbs <= 12 and (timeQualifier == "sera" or
                                timeQualifier == "pomeriggio"):
                hrAbs += 12

        if used > 0:
            # removed parsed words from the sentence
            for i in range(used):
                words[idx + i] = ""

            if wordPrev == "o" or wordPrev == "oh":
                words[words.index(wordPrev)] = ""

            if wordPrev == "presto":
                hrOffset = -1
                words[idx - 1] = ""
                idx -= 1
            elif wordPrev == "tardi":
                hrOffset = 1
                words[idx - 1] = ""
                idx -= 1
            if idx > 0 and wordPrev in markers:
                words[idx - 1] = ""
            if idx > 1 and wordPrevPrev in markers:
                words[idx - 2] = ""

            idx += used - 1
            found = True

    # check that we found a date
    if not date_found:
        return None

    if dayOffset is False:
        dayOffset = 0

    # perform date manipulation

    extractedDate = dateNow
    extractedDate = extractedDate.replace(microsecond=0,
                                          second=0,
                                          minute=0,
                                          hour=0)

    if datestr != "":
        en_months = ['january', 'february', 'march', 'april', 'may', 'june',
                     'july', 'august', 'september', 'october', 'november',
                     'december']
        en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
                          'aug',
                          'sept', 'oct', 'nov', 'dec']
        for idx, en_month in enumerate(en_months):
            datestr = datestr.replace(months[idx], en_month)
        for idx, en_month in enumerate(en_monthsShort):
            datestr = datestr.replace(monthsShort[idx], en_month)

        temp = datetime.strptime(datestr, "%B %d")
        if not hasYear:
            temp = temp.replace(year=extractedDate.year)
            if extractedDate < temp:
                extractedDate = extractedDate.replace(year=int(currentYear),
                                                      month=int(
                                                          temp.strftime(
                                                              "%m")),
                                                      day=int(temp.strftime(
                                                          "%d")))
            else:
                extractedDate = extractedDate.replace(
                    year=int(currentYear) + 1,
                    month=int(temp.strftime("%m")),
                    day=int(temp.strftime("%d")))
        else:
            extractedDate = extractedDate.replace(
                year=int(temp.strftime("%Y")),
                month=int(temp.strftime("%m")),
                day=int(temp.strftime("%d")))

    if timeStr != "":
        temp = datetime(timeStr)
        extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
                                              minute=temp.strftime("%M"),
                                              second=temp.strftime("%S"))

    if yearOffset != 0:
        extractedDate = extractedDate + relativedelta(years=yearOffset)
    if monthOffset != 0:
        extractedDate = extractedDate + relativedelta(months=monthOffset)
    if dayOffset != 0:
        extractedDate = extractedDate + relativedelta(days=dayOffset)
    if hrAbs != -1 and minAbs != -1:

        extractedDate = extractedDate + relativedelta(hours=hrAbs,
                                                      minutes=minAbs)
        if (hrAbs != 0 or minAbs != 0) and datestr == "":
            if not daySpecified and dateNow > extractedDate:
                extractedDate = extractedDate + relativedelta(days=1)
    if hrOffset != 0:
        extractedDate = extractedDate + relativedelta(hours=hrOffset)
    if minOffset != 0:
        extractedDate = extractedDate + relativedelta(minutes=minOffset)
    if secOffset != 0:
        extractedDate = extractedDate + relativedelta(seconds=secOffset)
    for idx, word in enumerate(words):
        if words[idx] == "e" and words[idx - 1] == "" and words[
                idx + 1] == "":
            words[idx] = ""

    resultStr = " ".join(words)
    resultStr = ' '.join(resultStr.split())
    return [extractedDate, resultStr]


def get_gender_it(word, raw_string=""):
    """
    Questa potrebbe non essere utile.
    In italiano per definire il genere è necessario
    analizzare l'articolo che la precede e non la lettera
    con cui finisce la parola, ma sono presenti funzioni per
    la rimozione degli articoli dalla frase per semplificarne
    l'analisi

    TODO:  verificare se utile
    """

    gender = False
    words = raw_string.split(" ")
    for idx, w in enumerate(words):
        if w == word and idx != 0:
            previous = words[idx - 1]
            gender = get_gender_it(previous)
            break

    if not gender:
        if word[-1] == "a" or word[-1] == "e":
            gender = "f"
        if word[-1] == "o" or word[-1] == "n" \
                or word[-1] == "l" or word[-1] == "i":
            gender = "m"

    return gender