fix extract date (#1651)

* Fix errors when spaces are missing "3pm" "5seconds"
* Fix relative times "in 15 minutes" is now from current time not from midnight

Resolves #1650
pull/1668/head
JarbasAI 2018-06-26 10:45:39 +01:00 committed by Åke
parent cf847d1bf2
commit fa4173a2d3
2 changed files with 68 additions and 30 deletions

View File

@ -292,7 +292,8 @@ def extract_datetime_en(string, currentDate=None):
ordinals = ["rd", "st", "nd", "th"]
if word[0].isdigit():
for ordinal in ordinals:
if ordinal in word:
# "second" is the only case we should not do this
if ordinal in word and "second" not in word:
word = word.replace(ordinal, "")
wordList[idx] = word
@ -664,7 +665,6 @@ def extract_datetime_en(string, currentDate=None):
if remainder == "":
remainder = wordNext.replace(".", "").lstrip().rstrip()
if (
remainder == "pm" or
wordNext == "pm" or
@ -682,67 +682,64 @@ def extract_datetime_en(string, currentDate=None):
remainder = "am"
used = 1
else:
if wordNext == "pm" or wordNext == "p.m.":
strHH = strNum
remainder = "pm"
used = 1
elif wordNext == "am" or wordNext == "a.m.":
strHH = strNum
remainder = "am"
used = 1
elif (
int(word) > 100 and
if (
int(strNum) > 100 and
(
wordPrev == "o" or
wordPrev == "oh"
)):
# 0800 hours (pronounced oh-eight-hundred)
strHH = int(word) / 100
strMM = int(word) - strHH * 100
strHH = int(strNum) / 100
strMM = int(strNum) - strHH * 100
military = True
if wordNext == "hours":
used += 1
elif (
wordNext == "hours" and
(wordNext == "hours" or wordNext == "hour" or
remainder == "hours" or remainder == "hour") and
word[0] != '0' and
(
int(word) < 100 and
int(word) > 2400
int(strNum) < 100 or
int(strNum) > 2400
)):
# ignores military time
# "in 3 hours"
hrOffset = int(word)
hrOffset = int(strNum)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "minutes":
elif wordNext == "minutes" or wordNext == "minute" or \
remainder == "minutes" or remainder == "minute":
# "in 10 minutes"
minOffset = int(word)
minOffset = int(strNum)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "seconds":
elif wordNext == "seconds" or wordNext == "second" \
or remainder == "seconds" or remainder == "second":
# in 5 seconds
secOffset = int(word)
secOffset = int(strNum)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif int(word) > 100:
strHH = int(word) / 100
strMM = int(word) - strHH * 100
elif int(strNum) > 100:
strHH = int(strNum) / 100
strMM = int(strNum) - strHH * 100
military = True
if wordNext == "hours":
if wordNext == "hours" or wordNext == "hour" or \
remainder == "hours" or remainder == "hour":
used += 1
elif wordNext[0].isdigit():
strHH = word
elif wordNext and wordNext[0].isdigit():
strHH = strNum
strMM = wordNext
military = True
used += 1
if wordNextNext == "hours":
if wordNext == "hours" or wordNext == "hour" or \
remainder == "hours" or remainder == "hour":
used += 1
elif (
wordNext == "" or wordNext == "o'clock" or
@ -753,7 +750,7 @@ def extract_datetime_en(string, currentDate=None):
wordNextNext == timeQualifier
)
)):
strHH = word
strHH = strNum
strMM = 00
if wordNext == "o'clock":
used += 1
@ -788,6 +785,12 @@ def extract_datetime_en(string, currentDate=None):
else:
isTime = False
# keep current date
if not military and remainder not in ["pm", "am", "o'clock"]:
hrOffset = hrOffset + int(dateNow.strftime("%H"))
minOffset = minOffset + int(dateNow.strftime("%M"))
secOffset = secOffset + int(dateNow.strftime("%S"))
strHH = int(strHH) if strHH else 0
strMM = int(strMM) if strMM else 0
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
@ -802,6 +805,8 @@ def extract_datetime_en(string, currentDate=None):
if used > 0:
# removed parsed words from the sentence
for i in range(used):
if idx + i >= len(words):
break
words[idx + i] = ""
if wordPrev == "o" or wordPrev == "oh":

View File

@ -185,6 +185,39 @@ class TestNormalize(unittest.TestCase):
"2017-12-03 00:00:00", "")
testExtract("lets meet at 8:00 tonight",
"2017-06-27 20:00:00", "lets meet")
testExtract("lets meet at 5pm",
"2017-06-27 17:00:00", "lets meet")
def test_extract_relativedatetime_en(self):
def extractWithFormat(text):
date = datetime(2017, 6, 27, 10, 0)
[extractedDate, leftover] = extract_datetime(text, date)
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
return [extractedDate, leftover]
def testExtract(text, expected_date, expected_leftover):
res = extractWithFormat(text)
self.assertEqual(res[0], expected_date)
self.assertEqual(res[1], expected_leftover)
testExtract("lets meet in 5 minutes",
"2017-06-27 10:05:00", "lets meet")
testExtract("lets meet in 5minutes",
"2017-06-27 10:05:00", "lets meet")
testExtract("lets meet in 5 seconds",
"2017-06-27 10:00:05", "lets meet")
testExtract("lets meet in 1 hour",
"2017-06-27 11:00:00", "lets meet")
testExtract("lets meet in 2 hours",
"2017-06-27 12:00:00", "lets meet")
testExtract("lets meet in 2hours",
"2017-06-27 12:00:00", "lets meet")
testExtract("lets meet in 1 minute",
"2017-06-27 10:01:00", "lets meet")
testExtract("lets meet in 1 second",
"2017-06-27 10:00:01", "lets meet")
testExtract("lets meet in 5seconds",
"2017-06-27 10:00:05", "lets meet")
def test_spaces(self):
self.assertEqual(normalize(" this is a test"),