Great patch from Ulf:

- The href target for a new window is "_new", not "new".

- Generating <div> sections within <p> sections is forbidden by the
  XHTML standard. Using just the right aligned <div> should be
  sufficient and makes XHTML themes possible.
  (Prove at http://blog.rompe.org/ )

- While parsing the header of an RSS feed one should be aware that
  there may be more <title> tags in subsections and that POSIX regular
  expressions are always gready. So make shure we don't get too much.
  (If you agree that using PCRE instead of the POSIX ones would be
  generally a good idea, then I am willing to make the patch, but for
  now I didn't want to mix POSIX and PCRE in one file.)
  (Prove at http://blog.rompe.org/index.php?q=import/feed/43 , try
  this feed without my patch)

- Some RSS 2.0 feeds don't have a per item <link> section but have the
  permalink embedded in the <guid> section. This is not perfectly
  correct and the documentation mentions this possibility only in the
  examples, but since Dave Winer himself implements it this way it
  will happen more than once. So, if there is no link available and
  the guid looks like an address, then use that one.
  (Prove at http://blog.rompe.org/index.php?q=import/feed/22 , try
  this feed without my patch)

- Don't only write eventually new Feed Header information into the
  database but also use them immediatly. Reuse the $feed array that is
  made for it.

- If a feed doesn't provide per item titles, make shure to not produce
  defective markup by cutting the remainder of an entity. Instead of
  just cutting off anything behing the leading 30 characters of the
  cleaned description, it seems slicker to use up to 40 characters and
  split on word boundaries, but not on "&" or ";".
  (Prove also at http://blog.rompe.org/index.php?q=import/feed/22 .
  This feed will have title tags starting on February 1st, but I
  suspect many others without them out there.)

With this patch one could consider Drupals aggregator RSS 2.0 ready.
4.2.x
Dries Buytaert 2003-01-07 19:09:42 +00:00
parent 337b80b1a6
commit 485e1c394a
3 changed files with 99 additions and 45 deletions

View File

@ -67,7 +67,7 @@ function import_format_item($item, $feed = 0) {
}
// external link
$output .= "<a href=\"$item->link\" target=\"new\">$item->title</a>";
$output .= "<a href=\"$item->link\" target=\"_new\">$item->title</a>";
return $output ."<br />";
}
@ -116,13 +116,13 @@ function import_block($op, $delta) {
$feed = db_fetch_object(db_query("SELECT * FROM feed WHERE fid = '%d'", $delta));
if ($feed) {
$block["subject"] = $feed->title;
$block["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
$block["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
}
else {
// it was a bundle. this is NOT elegant
$bundle = db_fetch_object(db_query("SELECT * FROM bundle WHERE bid = '%d'", $delta));
$block["subject"] = $bundle->title;
$block["content"] = import_bundle_block($bundle->attributes) ."<p><div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div></p>";
$block["content"] = import_bundle_block($bundle->attributes) ."<div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div>";
}
return $block;
@ -150,7 +150,7 @@ function import_get_feeds($attributes = 0) {
$result = db_query("SELECT * FROM feed ORDER BY fid");
while ($feed = db_fetch_object($result)) {
$block[$feed->fid]["subject"] = $feed->title;
$block[$feed->fid]["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
$block[$feed->fid]["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
$block[$feed->fid]["info"] = "$feed->title feed";
}
@ -200,18 +200,18 @@ function import_refresh($feed) {
$channel = ereg_replace("<item([^s].*)</item>", "", $data);
eregi("<title>(.*)</title>", $channel, $title);
eregi("<link>(.*)</link>", $channel, $link);
eregi("<description>(.*)</description>", $channel, $description);
eregi("<title>([^<]*)</title>", $channel, $title);
eregi("<link>([^<]*)</link>", $channel, $link);
eregi("<description>([^<]*)</description>", $channel, $description);
/*
** Strip invalid tags and provide default values (if required):
*/
$link = strip_tags($link[1]);
$description = filter(strtr($description[1], $tt));
$feed["link"] = strip_tags($link[1]);
$feed["description"] = filter(strtr($description[1], $tt));
db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $link, $description, $feed["fid"]);
db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $feed["link"], $feed["description"], $feed["fid"]);
/*
** Extract and process individual items:
@ -227,19 +227,37 @@ function import_refresh($feed) {
$t = eregi("<title>(.*)</title>", $item, $title);
$l = eregi("<link>(.*)</link>", $item, $link);
$g = eregi("<guid.*>(.*)</guid>", $item, $guid);
$a = eregi("<author>(.*)</author>", $item, $author);
$d = eregi("<description>(.*)</description>", $item, $description);
if ($t || $l || $a || $d) {
if ($t || $l || $g || $a || $d) {
/*
** Strip invalid tags and provide default values (if required):
*/
$title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt));
$link = strip_tags($link[1] ? $link[1] : $feed["link"]);
$author = strip_tags($author[1]);
$description = filter(strtr($description[1], $tt));
if ($title[1]) {
$title = strip_tags(strtr($title[1], $tt));
}
else {
/*
** Use up to 40 characters of the $description, ending at
** word boundary, but don't split potential entities.
*/
$title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", substr(strip_tags($description), 0, 40));
}
if ($link[1]) {
$link = strip_tags($link[1]);
}
elseif ($guid[1] && (strncmp($guid[1], "http://", 7) == 0)) {
$link = strip_tags($guid[1]);
}
else {
$link = $feed["link"];
}
$author = strip_tags($author[1]);
// print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />";
@ -705,4 +723,4 @@ function import_page() {
}
}
?>
?>

View File

@ -67,7 +67,7 @@ function import_format_item($item, $feed = 0) {
}
// external link
$output .= "<a href=\"$item->link\" target=\"new\">$item->title</a>";
$output .= "<a href=\"$item->link\" target=\"_new\">$item->title</a>";
return $output ."<br />";
}
@ -116,13 +116,13 @@ function import_block($op, $delta) {
$feed = db_fetch_object(db_query("SELECT * FROM feed WHERE fid = '%d'", $delta));
if ($feed) {
$block["subject"] = $feed->title;
$block["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
$block["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
}
else {
// it was a bundle. this is NOT elegant
$bundle = db_fetch_object(db_query("SELECT * FROM bundle WHERE bid = '%d'", $delta));
$block["subject"] = $bundle->title;
$block["content"] = import_bundle_block($bundle->attributes) ."<p><div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div></p>";
$block["content"] = import_bundle_block($bundle->attributes) ."<div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div>";
}
return $block;
@ -150,7 +150,7 @@ function import_get_feeds($attributes = 0) {
$result = db_query("SELECT * FROM feed ORDER BY fid");
while ($feed = db_fetch_object($result)) {
$block[$feed->fid]["subject"] = $feed->title;
$block[$feed->fid]["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
$block[$feed->fid]["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
$block[$feed->fid]["info"] = "$feed->title feed";
}
@ -200,18 +200,18 @@ function import_refresh($feed) {
$channel = ereg_replace("<item([^s].*)</item>", "", $data);
eregi("<title>(.*)</title>", $channel, $title);
eregi("<link>(.*)</link>", $channel, $link);
eregi("<description>(.*)</description>", $channel, $description);
eregi("<title>([^<]*)</title>", $channel, $title);
eregi("<link>([^<]*)</link>", $channel, $link);
eregi("<description>([^<]*)</description>", $channel, $description);
/*
** Strip invalid tags and provide default values (if required):
*/
$link = strip_tags($link[1]);
$description = filter(strtr($description[1], $tt));
$feed["link"] = strip_tags($link[1]);
$feed["description"] = filter(strtr($description[1], $tt));
db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $link, $description, $feed["fid"]);
db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $feed["link"], $feed["description"], $feed["fid"]);
/*
** Extract and process individual items:
@ -227,19 +227,37 @@ function import_refresh($feed) {
$t = eregi("<title>(.*)</title>", $item, $title);
$l = eregi("<link>(.*)</link>", $item, $link);
$g = eregi("<guid.*>(.*)</guid>", $item, $guid);
$a = eregi("<author>(.*)</author>", $item, $author);
$d = eregi("<description>(.*)</description>", $item, $description);
if ($t || $l || $a || $d) {
if ($t || $l || $g || $a || $d) {
/*
** Strip invalid tags and provide default values (if required):
*/
$title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt));
$link = strip_tags($link[1] ? $link[1] : $feed["link"]);
$author = strip_tags($author[1]);
$description = filter(strtr($description[1], $tt));
if ($title[1]) {
$title = strip_tags(strtr($title[1], $tt));
}
else {
/*
** Use up to 40 characters of the $description, ending at
** word boundary, but don't split potential entities.
*/
$title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", substr(strip_tags($description), 0, 40));
}
if ($link[1]) {
$link = strip_tags($link[1]);
}
elseif ($guid[1] && (strncmp($guid[1], "http://", 7) == 0)) {
$link = strip_tags($guid[1]);
}
else {
$link = $feed["link"];
}
$author = strip_tags($author[1]);
// print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />";
@ -705,4 +723,4 @@ function import_page() {
}
}
?>
?>

View File

@ -67,7 +67,7 @@ function import_format_item($item, $feed = 0) {
}
// external link
$output .= "<a href=\"$item->link\" target=\"new\">$item->title</a>";
$output .= "<a href=\"$item->link\" target=\"_new\">$item->title</a>";
return $output ."<br />";
}
@ -116,13 +116,13 @@ function import_block($op, $delta) {
$feed = db_fetch_object(db_query("SELECT * FROM feed WHERE fid = '%d'", $delta));
if ($feed) {
$block["subject"] = $feed->title;
$block["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
$block["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
}
else {
// it was a bundle. this is NOT elegant
$bundle = db_fetch_object(db_query("SELECT * FROM bundle WHERE bid = '%d'", $delta));
$block["subject"] = $bundle->title;
$block["content"] = import_bundle_block($bundle->attributes) ."<p><div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div></p>";
$block["content"] = import_bundle_block($bundle->attributes) ."<div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div>";
}
return $block;
@ -150,7 +150,7 @@ function import_get_feeds($attributes = 0) {
$result = db_query("SELECT * FROM feed ORDER BY fid");
while ($feed = db_fetch_object($result)) {
$block[$feed->fid]["subject"] = $feed->title;
$block[$feed->fid]["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
$block[$feed->fid]["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
$block[$feed->fid]["info"] = "$feed->title feed";
}
@ -200,18 +200,18 @@ function import_refresh($feed) {
$channel = ereg_replace("<item([^s].*)</item>", "", $data);
eregi("<title>(.*)</title>", $channel, $title);
eregi("<link>(.*)</link>", $channel, $link);
eregi("<description>(.*)</description>", $channel, $description);
eregi("<title>([^<]*)</title>", $channel, $title);
eregi("<link>([^<]*)</link>", $channel, $link);
eregi("<description>([^<]*)</description>", $channel, $description);
/*
** Strip invalid tags and provide default values (if required):
*/
$link = strip_tags($link[1]);
$description = filter(strtr($description[1], $tt));
$feed["link"] = strip_tags($link[1]);
$feed["description"] = filter(strtr($description[1], $tt));
db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $link, $description, $feed["fid"]);
db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $feed["link"], $feed["description"], $feed["fid"]);
/*
** Extract and process individual items:
@ -227,19 +227,37 @@ function import_refresh($feed) {
$t = eregi("<title>(.*)</title>", $item, $title);
$l = eregi("<link>(.*)</link>", $item, $link);
$g = eregi("<guid.*>(.*)</guid>", $item, $guid);
$a = eregi("<author>(.*)</author>", $item, $author);
$d = eregi("<description>(.*)</description>", $item, $description);
if ($t || $l || $a || $d) {
if ($t || $l || $g || $a || $d) {
/*
** Strip invalid tags and provide default values (if required):
*/
$title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt));
$link = strip_tags($link[1] ? $link[1] : $feed["link"]);
$author = strip_tags($author[1]);
$description = filter(strtr($description[1], $tt));
if ($title[1]) {
$title = strip_tags(strtr($title[1], $tt));
}
else {
/*
** Use up to 40 characters of the $description, ending at
** word boundary, but don't split potential entities.
*/
$title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", substr(strip_tags($description), 0, 40));
}
if ($link[1]) {
$link = strip_tags($link[1]);
}
elseif ($guid[1] && (strncmp($guid[1], "http://", 7) == 0)) {
$link = strip_tags($guid[1]);
}
else {
$link = $feed["link"];
}
$author = strip_tags($author[1]);
// print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />";
@ -705,4 +723,4 @@ function import_page() {
}
}
?>
?>