fix extract date (#1651)
* Fix errors when spaces are missing "3pm" "5seconds" * Fix relative times "in 15 minutes" is now from current time not from midnight Resolves #1650pull/1668/head
parent
cf847d1bf2
commit
fa4173a2d3
|
@ -292,7 +292,8 @@ def extract_datetime_en(string, currentDate=None):
|
||||||
ordinals = ["rd", "st", "nd", "th"]
|
ordinals = ["rd", "st", "nd", "th"]
|
||||||
if word[0].isdigit():
|
if word[0].isdigit():
|
||||||
for ordinal in ordinals:
|
for ordinal in ordinals:
|
||||||
if ordinal in word:
|
# "second" is the only case we should not do this
|
||||||
|
if ordinal in word and "second" not in word:
|
||||||
word = word.replace(ordinal, "")
|
word = word.replace(ordinal, "")
|
||||||
wordList[idx] = word
|
wordList[idx] = word
|
||||||
|
|
||||||
|
@ -664,7 +665,6 @@ def extract_datetime_en(string, currentDate=None):
|
||||||
|
|
||||||
if remainder == "":
|
if remainder == "":
|
||||||
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||||
|
|
||||||
if (
|
if (
|
||||||
remainder == "pm" or
|
remainder == "pm" or
|
||||||
wordNext == "pm" or
|
wordNext == "pm" or
|
||||||
|
@ -682,67 +682,64 @@ def extract_datetime_en(string, currentDate=None):
|
||||||
remainder = "am"
|
remainder = "am"
|
||||||
used = 1
|
used = 1
|
||||||
else:
|
else:
|
||||||
if wordNext == "pm" or wordNext == "p.m.":
|
if (
|
||||||
strHH = strNum
|
int(strNum) > 100 and
|
||||||
remainder = "pm"
|
|
||||||
used = 1
|
|
||||||
elif wordNext == "am" or wordNext == "a.m.":
|
|
||||||
strHH = strNum
|
|
||||||
remainder = "am"
|
|
||||||
used = 1
|
|
||||||
elif (
|
|
||||||
int(word) > 100 and
|
|
||||||
(
|
(
|
||||||
wordPrev == "o" or
|
wordPrev == "o" or
|
||||||
wordPrev == "oh"
|
wordPrev == "oh"
|
||||||
)):
|
)):
|
||||||
# 0800 hours (pronounced oh-eight-hundred)
|
# 0800 hours (pronounced oh-eight-hundred)
|
||||||
strHH = int(word) / 100
|
strHH = int(strNum) / 100
|
||||||
strMM = int(word) - strHH * 100
|
strMM = int(strNum) - strHH * 100
|
||||||
military = True
|
military = True
|
||||||
if wordNext == "hours":
|
if wordNext == "hours":
|
||||||
used += 1
|
used += 1
|
||||||
elif (
|
elif (
|
||||||
wordNext == "hours" and
|
(wordNext == "hours" or wordNext == "hour" or
|
||||||
|
remainder == "hours" or remainder == "hour") and
|
||||||
word[0] != '0' and
|
word[0] != '0' and
|
||||||
(
|
(
|
||||||
int(word) < 100 and
|
int(strNum) < 100 or
|
||||||
int(word) > 2400
|
int(strNum) > 2400
|
||||||
)):
|
)):
|
||||||
# ignores military time
|
# ignores military time
|
||||||
# "in 3 hours"
|
# "in 3 hours"
|
||||||
hrOffset = int(word)
|
hrOffset = int(strNum)
|
||||||
used = 2
|
used = 2
|
||||||
isTime = False
|
isTime = False
|
||||||
hrAbs = -1
|
hrAbs = -1
|
||||||
minAbs = -1
|
minAbs = -1
|
||||||
|
|
||||||
elif wordNext == "minutes":
|
elif wordNext == "minutes" or wordNext == "minute" or \
|
||||||
|
remainder == "minutes" or remainder == "minute":
|
||||||
# "in 10 minutes"
|
# "in 10 minutes"
|
||||||
minOffset = int(word)
|
minOffset = int(strNum)
|
||||||
used = 2
|
used = 2
|
||||||
isTime = False
|
isTime = False
|
||||||
hrAbs = -1
|
hrAbs = -1
|
||||||
minAbs = -1
|
minAbs = -1
|
||||||
elif wordNext == "seconds":
|
elif wordNext == "seconds" or wordNext == "second" \
|
||||||
|
or remainder == "seconds" or remainder == "second":
|
||||||
# in 5 seconds
|
# in 5 seconds
|
||||||
secOffset = int(word)
|
secOffset = int(strNum)
|
||||||
used = 2
|
used = 2
|
||||||
isTime = False
|
isTime = False
|
||||||
hrAbs = -1
|
hrAbs = -1
|
||||||
minAbs = -1
|
minAbs = -1
|
||||||
elif int(word) > 100:
|
elif int(strNum) > 100:
|
||||||
strHH = int(word) / 100
|
strHH = int(strNum) / 100
|
||||||
strMM = int(word) - strHH * 100
|
strMM = int(strNum) - strHH * 100
|
||||||
military = True
|
military = True
|
||||||
if wordNext == "hours":
|
if wordNext == "hours" or wordNext == "hour" or \
|
||||||
|
remainder == "hours" or remainder == "hour":
|
||||||
used += 1
|
used += 1
|
||||||
elif wordNext[0].isdigit():
|
elif wordNext and wordNext[0].isdigit():
|
||||||
strHH = word
|
strHH = strNum
|
||||||
strMM = wordNext
|
strMM = wordNext
|
||||||
military = True
|
military = True
|
||||||
used += 1
|
used += 1
|
||||||
if wordNextNext == "hours":
|
if wordNext == "hours" or wordNext == "hour" or \
|
||||||
|
remainder == "hours" or remainder == "hour":
|
||||||
used += 1
|
used += 1
|
||||||
elif (
|
elif (
|
||||||
wordNext == "" or wordNext == "o'clock" or
|
wordNext == "" or wordNext == "o'clock" or
|
||||||
|
@ -753,7 +750,7 @@ def extract_datetime_en(string, currentDate=None):
|
||||||
wordNextNext == timeQualifier
|
wordNextNext == timeQualifier
|
||||||
)
|
)
|
||||||
)):
|
)):
|
||||||
strHH = word
|
strHH = strNum
|
||||||
strMM = 00
|
strMM = 00
|
||||||
if wordNext == "o'clock":
|
if wordNext == "o'clock":
|
||||||
used += 1
|
used += 1
|
||||||
|
@ -788,6 +785,12 @@ def extract_datetime_en(string, currentDate=None):
|
||||||
else:
|
else:
|
||||||
isTime = False
|
isTime = False
|
||||||
|
|
||||||
|
# keep current date
|
||||||
|
if not military and remainder not in ["pm", "am", "o'clock"]:
|
||||||
|
hrOffset = hrOffset + int(dateNow.strftime("%H"))
|
||||||
|
minOffset = minOffset + int(dateNow.strftime("%M"))
|
||||||
|
secOffset = secOffset + int(dateNow.strftime("%S"))
|
||||||
|
|
||||||
strHH = int(strHH) if strHH else 0
|
strHH = int(strHH) if strHH else 0
|
||||||
strMM = int(strMM) if strMM else 0
|
strMM = int(strMM) if strMM else 0
|
||||||
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||||
|
@ -802,6 +805,8 @@ def extract_datetime_en(string, currentDate=None):
|
||||||
if used > 0:
|
if used > 0:
|
||||||
# removed parsed words from the sentence
|
# removed parsed words from the sentence
|
||||||
for i in range(used):
|
for i in range(used):
|
||||||
|
if idx + i >= len(words):
|
||||||
|
break
|
||||||
words[idx + i] = ""
|
words[idx + i] = ""
|
||||||
|
|
||||||
if wordPrev == "o" or wordPrev == "oh":
|
if wordPrev == "o" or wordPrev == "oh":
|
||||||
|
|
|
@ -185,6 +185,39 @@ class TestNormalize(unittest.TestCase):
|
||||||
"2017-12-03 00:00:00", "")
|
"2017-12-03 00:00:00", "")
|
||||||
testExtract("lets meet at 8:00 tonight",
|
testExtract("lets meet at 8:00 tonight",
|
||||||
"2017-06-27 20:00:00", "lets meet")
|
"2017-06-27 20:00:00", "lets meet")
|
||||||
|
testExtract("lets meet at 5pm",
|
||||||
|
"2017-06-27 17:00:00", "lets meet")
|
||||||
|
|
||||||
|
def test_extract_relativedatetime_en(self):
|
||||||
|
def extractWithFormat(text):
|
||||||
|
date = datetime(2017, 6, 27, 10, 0)
|
||||||
|
[extractedDate, leftover] = extract_datetime(text, date)
|
||||||
|
extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
return [extractedDate, leftover]
|
||||||
|
|
||||||
|
def testExtract(text, expected_date, expected_leftover):
|
||||||
|
res = extractWithFormat(text)
|
||||||
|
self.assertEqual(res[0], expected_date)
|
||||||
|
self.assertEqual(res[1], expected_leftover)
|
||||||
|
|
||||||
|
testExtract("lets meet in 5 minutes",
|
||||||
|
"2017-06-27 10:05:00", "lets meet")
|
||||||
|
testExtract("lets meet in 5minutes",
|
||||||
|
"2017-06-27 10:05:00", "lets meet")
|
||||||
|
testExtract("lets meet in 5 seconds",
|
||||||
|
"2017-06-27 10:00:05", "lets meet")
|
||||||
|
testExtract("lets meet in 1 hour",
|
||||||
|
"2017-06-27 11:00:00", "lets meet")
|
||||||
|
testExtract("lets meet in 2 hours",
|
||||||
|
"2017-06-27 12:00:00", "lets meet")
|
||||||
|
testExtract("lets meet in 2hours",
|
||||||
|
"2017-06-27 12:00:00", "lets meet")
|
||||||
|
testExtract("lets meet in 1 minute",
|
||||||
|
"2017-06-27 10:01:00", "lets meet")
|
||||||
|
testExtract("lets meet in 1 second",
|
||||||
|
"2017-06-27 10:00:01", "lets meet")
|
||||||
|
testExtract("lets meet in 5seconds",
|
||||||
|
"2017-06-27 10:00:05", "lets meet")
|
||||||
|
|
||||||
def test_spaces(self):
|
def test_spaces(self):
|
||||||
self.assertEqual(normalize(" this is a test"),
|
self.assertEqual(normalize(" this is a test"),
|
||||||
|
|
Loading…
Reference in New Issue