fix extract date (#1651)
* Fix errors when spaces are missing "3pm" "5seconds" * Fix relative times "in 15 minutes" is now from current time not from midnight Resolves #1650pull/1668/head
parent
cf847d1bf2
commit
fa4173a2d3
|
@ -292,7 +292,8 @@ def extract_datetime_en(string, currentDate=None):
|
|||
ordinals = ["rd", "st", "nd", "th"]
|
||||
if word[0].isdigit():
|
||||
for ordinal in ordinals:
|
||||
if ordinal in word:
|
||||
# "second" is the only case we should not do this
|
||||
if ordinal in word and "second" not in word:
|
||||
word = word.replace(ordinal, "")
|
||||
wordList[idx] = word
|
||||
|
||||
|
@ -664,7 +665,6 @@ def extract_datetime_en(string, currentDate=None):
|
|||
|
||||
if remainder == "":
|
||||
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||
|
||||
if (
|
||||
remainder == "pm" or
|
||||
wordNext == "pm" or
|
||||
|
@ -682,67 +682,64 @@ def extract_datetime_en(string, currentDate=None):
|
|||
remainder = "am"
|
||||
used = 1
|
||||
else:
|
||||
if wordNext == "pm" or wordNext == "p.m.":
|
||||
strHH = strNum
|
||||
remainder = "pm"
|
||||
used = 1
|
||||
elif wordNext == "am" or wordNext == "a.m.":
|
||||
strHH = strNum
|
||||
remainder = "am"
|
||||
used = 1
|
||||
elif (
|
||||
int(word) > 100 and
|
||||
if (
|
||||
int(strNum) > 100 and
|
||||
(
|
||||
wordPrev == "o" or
|
||||
wordPrev == "oh"
|
||||
)):
|
||||
# 0800 hours (pronounced oh-eight-hundred)
|
||||
strHH = int(word) / 100
|
||||
strMM = int(word) - strHH * 100
|
||||
strHH = int(strNum) / 100
|
||||
strMM = int(strNum) - strHH * 100
|
||||
military = True
|
||||
if wordNext == "hours":
|
||||
used += 1
|
||||
elif (
|
||||
wordNext == "hours" and
|
||||
(wordNext == "hours" or wordNext == "hour" or
|
||||
remainder == "hours" or remainder == "hour") and
|
||||
word[0] != '0' and
|
||||
(
|
||||
int(word) < 100 and
|
||||
int(word) > 2400
|
||||
int(strNum) < 100 or
|
||||
int(strNum) > 2400
|
||||
)):
|
||||
# ignores military time
|
||||
# "in 3 hours"
|
||||
hrOffset = int(word)
|
||||
hrOffset = int(strNum)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
|
||||
elif wordNext == "minutes":
|
||||
elif wordNext == "minutes" or wordNext == "minute" or \
|
||||
remainder == "minutes" or remainder == "minute":
|
||||
# "in 10 minutes"
|
||||
minOffset = int(word)
|
||||
minOffset = int(strNum)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif wordNext == "seconds":
|
||||
elif wordNext == "seconds" or wordNext == "second" \
|
||||
or remainder == "seconds" or remainder == "second":
|
||||
# in 5 seconds
|
||||
secOffset = int(word)
|
||||
secOffset = int(strNum)
|
||||
used = 2
|
||||
isTime = False
|
||||
hrAbs = -1
|
||||
minAbs = -1
|
||||
elif int(word) > 100:
|
||||
strHH = int(word) / 100
|
||||
strMM = int(word) - strHH * 100
|
||||
elif int(strNum) > 100:
|
||||
strHH = int(strNum) / 100
|
||||
strMM = int(strNum) - strHH * 100
|
||||
military = True
|
||||
if wordNext == "hours":
|
||||
if wordNext == "hours" or wordNext == "hour" or \
|
||||
remainder == "hours" or remainder == "hour":
|
||||
used += 1
|
||||
elif wordNext[0].isdigit():
|
||||
strHH = word
|
||||
elif wordNext and wordNext[0].isdigit():
|
||||
strHH = strNum
|
||||
strMM = wordNext
|
||||
military = True
|
||||
used += 1
|
||||
if wordNextNext == "hours":
|
||||
if wordNext == "hours" or wordNext == "hour" or \
|
||||
remainder == "hours" or remainder == "hour":
|
||||
used += 1
|
||||
elif (
|
||||
wordNext == "" or wordNext == "o'clock" or
|
||||
|
@ -753,7 +750,7 @@ def extract_datetime_en(string, currentDate=None):
|
|||
wordNextNext == timeQualifier
|
||||
)
|
||||
)):
|
||||
strHH = word
|
||||
strHH = strNum
|
||||
strMM = 00
|
||||
if wordNext == "o'clock":
|
||||
used += 1
|
||||
|
@ -788,6 +785,12 @@ def extract_datetime_en(string, currentDate=None):
|
|||
else:
|
||||
isTime = False
|
||||
|
||||
# keep current date
|
||||
if not military and remainder not in ["pm", "am", "o'clock"]:
|
||||
hrOffset = hrOffset + int(dateNow.strftime("%H"))
|
||||
minOffset = minOffset + int(dateNow.strftime("%M"))
|
||||
secOffset = secOffset + int(dateNow.strftime("%S"))
|
||||
|
||||
strHH = int(strHH) if strHH else 0
|
||||
strMM = int(strMM) if strMM else 0
|
||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||
|
@ -802,6 +805,8 @@ def extract_datetime_en(string, currentDate=None):
|
|||
if used > 0:
|
||||
# removed parsed words from the sentence
|
||||
for i in range(used):
|
||||
if idx + i >= len(words):
|
||||
break
|
||||
words[idx + i] = ""
|
||||
|
||||
if wordPrev == "o" or wordPrev == "oh":
|
||||
|
|
|
@ -185,6 +185,39 @@ class TestNormalize(unittest.TestCase):
|
|||
"2017-12-03 00:00:00", "")
|
||||
testExtract("lets meet at 8:00 tonight",
|
||||
"2017-06-27 20:00:00", "lets meet")
|
||||
testExtract("lets meet at 5pm",
|
||||
"2017-06-27 17:00:00", "lets meet")
|
||||
|
||||
def test_extract_relativedatetime_en(self):
|
||||
def extractWithFormat(text):
|
||||
date = datetime(2017, 6, 27, 10, 0)
|
||||
[extractedDate, leftover] = extract_datetime(text, date)
|
||||
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return [extractedDate, leftover]
|
||||
|
||||
def testExtract(text, expected_date, expected_leftover):
|
||||
res = extractWithFormat(text)
|
||||
self.assertEqual(res[0], expected_date)
|
||||
self.assertEqual(res[1], expected_leftover)
|
||||
|
||||
testExtract("lets meet in 5 minutes",
|
||||
"2017-06-27 10:05:00", "lets meet")
|
||||
testExtract("lets meet in 5minutes",
|
||||
"2017-06-27 10:05:00", "lets meet")
|
||||
testExtract("lets meet in 5 seconds",
|
||||
"2017-06-27 10:00:05", "lets meet")
|
||||
testExtract("lets meet in 1 hour",
|
||||
"2017-06-27 11:00:00", "lets meet")
|
||||
testExtract("lets meet in 2 hours",
|
||||
"2017-06-27 12:00:00", "lets meet")
|
||||
testExtract("lets meet in 2hours",
|
||||
"2017-06-27 12:00:00", "lets meet")
|
||||
testExtract("lets meet in 1 minute",
|
||||
"2017-06-27 10:01:00", "lets meet")
|
||||
testExtract("lets meet in 1 second",
|
||||
"2017-06-27 10:00:01", "lets meet")
|
||||
testExtract("lets meet in 5seconds",
|
||||
"2017-06-27 10:00:05", "lets meet")
|
||||
|
||||
def test_spaces(self):
|
||||
self.assertEqual(normalize(" this is a test"),
|
||||
|
|
Loading…
Reference in New Issue