Fix `feedreader` component to keep the last entry timestamp up to date (#77547)

Fix feedreader to keep the last entry timestamp up to date

- Use `updated` date in precedence over `published` date to update `last_entry_timestamp`
  in the case a feed entry has both updated date and published date.
pull/77541/head
likeablob 2022-08-31 19:43:50 +09:00 committed by GitHub
parent 008ac8d10d
commit ee6ffb1be4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 12 additions and 11 deletions

View File

@ -156,26 +156,27 @@ class FeedManager:
def _update_and_fire_entry(self, entry: feedparser.FeedParserDict) -> None: def _update_and_fire_entry(self, entry: feedparser.FeedParserDict) -> None:
"""Update last_entry_timestamp and fire entry.""" """Update last_entry_timestamp and fire entry."""
# Check if the entry has a published or updated date. # Check if the entry has a updated or published date.
if "published_parsed" in entry and entry.published_parsed: # Start from a updated date because generally `updated` > `published`.
# We are lucky, `published_parsed` data available, let's make use of if "updated_parsed" in entry and entry.updated_parsed:
# it to publish only new available entries since the last run
self._has_published_parsed = True
self._last_entry_timestamp = max(
entry.published_parsed, self._last_entry_timestamp
)
elif "updated_parsed" in entry and entry.updated_parsed:
# We are lucky, `updated_parsed` data available, let's make use of # We are lucky, `updated_parsed` data available, let's make use of
# it to publish only new available entries since the last run # it to publish only new available entries since the last run
self._has_updated_parsed = True self._has_updated_parsed = True
self._last_entry_timestamp = max( self._last_entry_timestamp = max(
entry.updated_parsed, self._last_entry_timestamp entry.updated_parsed, self._last_entry_timestamp
) )
elif "published_parsed" in entry and entry.published_parsed:
# We are lucky, `published_parsed` data available, let's make use of
# it to publish only new available entries since the last run
self._has_published_parsed = True
self._last_entry_timestamp = max(
entry.published_parsed, self._last_entry_timestamp
)
else: else:
self._has_published_parsed = False
self._has_updated_parsed = False self._has_updated_parsed = False
self._has_published_parsed = False
_LOGGER.debug( _LOGGER.debug(
"No published_parsed or updated_parsed info available for entry %s", "No updated_parsed or published_parsed info available for entry %s",
entry, entry,
) )
entry.update({"feed_url": self._url}) entry.update({"feed_url": self._url})