Fix feedreader date comparison to allow RSS entries with identical timestamps (#104925)

Change feedreader publishdate comparison
pull/106311/head
Matt 2023-12-23 07:48:36 -08:00 committed by GitHub
parent e311a6835e
commit 345f7f2003
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 101 additions and 6 deletions

View File

@ -197,35 +197,40 @@ class FeedManager:
) )
entry.update({"feed_url": self._url}) entry.update({"feed_url": self._url})
self._hass.bus.fire(self._event_type, entry) self._hass.bus.fire(self._event_type, entry)
_LOGGER.debug("New event fired for entry %s", entry.get("link"))
def _publish_new_entries(self) -> None: def _publish_new_entries(self) -> None:
"""Publish new entries to the event bus.""" """Publish new entries to the event bus."""
assert self._feed is not None assert self._feed is not None
new_entries = False new_entry_count = 0
self._last_entry_timestamp = self._storage.get_timestamp(self._feed_id) self._last_entry_timestamp = self._storage.get_timestamp(self._feed_id)
if self._last_entry_timestamp: if self._last_entry_timestamp:
self._firstrun = False self._firstrun = False
else: else:
# Set last entry timestamp as epoch time if not available # Set last entry timestamp as epoch time if not available
self._last_entry_timestamp = dt_util.utc_from_timestamp(0).timetuple() self._last_entry_timestamp = dt_util.utc_from_timestamp(0).timetuple()
# locally cache self._last_entry_timestamp so that entries published at identical times can be processed
last_entry_timestamp = self._last_entry_timestamp
for entry in self._feed.entries: for entry in self._feed.entries:
if ( if (
self._firstrun self._firstrun
or ( or (
"published_parsed" in entry "published_parsed" in entry
and entry.published_parsed > self._last_entry_timestamp and entry.published_parsed > last_entry_timestamp
) )
or ( or (
"updated_parsed" in entry "updated_parsed" in entry
and entry.updated_parsed > self._last_entry_timestamp and entry.updated_parsed > last_entry_timestamp
) )
): ):
self._update_and_fire_entry(entry) self._update_and_fire_entry(entry)
new_entries = True new_entry_count += 1
else: else:
_LOGGER.debug("Entry %s already processed", entry) _LOGGER.debug("Already processed entry %s", entry.get("link"))
if not new_entries: if new_entry_count == 0:
self._log_no_entries() self._log_no_entries()
else:
_LOGGER.debug("%d entries published in feed %s", new_entry_count, self._url)
self._firstrun = False self._firstrun = False

View File

@ -68,6 +68,12 @@ def fixture_feed_atom_event(hass: HomeAssistant) -> bytes:
return load_fixture_bytes("feedreader5.xml") return load_fixture_bytes("feedreader5.xml")
@pytest.fixture(name="feed_identically_timed_events")
def fixture_feed_identically_timed_events(hass: HomeAssistant) -> bytes:
"""Load test feed data for two events published at the exact same time."""
return load_fixture_bytes("feedreader6.xml")
@pytest.fixture(name="events") @pytest.fixture(name="events")
async def fixture_events(hass: HomeAssistant) -> list[Event]: async def fixture_events(hass: HomeAssistant) -> list[Event]:
"""Fixture that catches alexa events.""" """Fixture that catches alexa events."""
@ -285,6 +291,63 @@ async def test_atom_feed(hass: HomeAssistant, events, feed_atom_event) -> None:
assert events[0].data.updated_parsed.tm_min == 30 assert events[0].data.updated_parsed.tm_min == 30
async def test_feed_identical_timestamps(
hass: HomeAssistant, events, feed_identically_timed_events
) -> None:
"""Test feed with 2 entries with identical timestamps."""
with patch(
"feedparser.http.get",
return_value=feed_identically_timed_events,
), patch(
"homeassistant.components.feedreader.StoredData.get_timestamp",
return_value=gmtime(
datetime.fromisoformat("1970-01-01T00:00:00.0+0000").timestamp()
),
):
assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_2)
hass.bus.async_fire(EVENT_HOMEASSISTANT_START)
await hass.async_block_till_done()
assert len(events) == 2
assert events[0].data.title == "Title 1"
assert events[1].data.title == "Title 2"
assert events[0].data.link == "http://www.example.com/link/1"
assert events[1].data.link == "http://www.example.com/link/2"
assert events[0].data.id == "GUID 1"
assert events[1].data.id == "GUID 2"
assert (
events[0].data.updated_parsed.tm_year
== events[1].data.updated_parsed.tm_year
== 2018
)
assert (
events[0].data.updated_parsed.tm_mon
== events[1].data.updated_parsed.tm_mon
== 4
)
assert (
events[0].data.updated_parsed.tm_mday
== events[1].data.updated_parsed.tm_mday
== 30
)
assert (
events[0].data.updated_parsed.tm_hour
== events[1].data.updated_parsed.tm_hour
== 15
)
assert (
events[0].data.updated_parsed.tm_min
== events[1].data.updated_parsed.tm_min
== 10
)
assert (
events[0].data.updated_parsed.tm_sec
== events[1].data.updated_parsed.tm_sec
== 0
)
async def test_feed_updates( async def test_feed_updates(
hass: HomeAssistant, events, feed_one_event, feed_two_event hass: HomeAssistant, events, feed_one_event, feed_two_event
) -> None: ) -> None:

27
tests/fixtures/feedreader6.xml vendored Normal file
View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Sample</title>
<description>This is an example of an RSS feed</description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 30 Apr 2018 12:00:00 +0000 </lastBuildDate>
<pubDate>Mon, 30 Apr 2018 15:00:00 +0000</pubDate>
<ttl>1800</ttl>
<item>
<title>Title 1</title>
<description>Description 1</description>
<link>http://www.example.com/link/1</link>
<guid isPermaLink="false">GUID 1</guid>
<pubDate>Mon, 30 Apr 2018 15:10:00 +0000</pubDate>
</item>
<item>
<title>Title 2</title>
<description>Description 2</description>
<link>http://www.example.com/link/2</link>
<guid isPermaLink="false">GUID 2</guid>
<pubDate>Mon, 30 Apr 2018 15:10:00 +0000</pubDate>
</item>
</channel>
</rss>