Great patch from Ulf:

- The href target for a new window is "_new", not "new". - Generating <div> sections within <p> sections is forbidden by the XHTML standard. Using just the right aligned <div> should be sufficient and makes XHTML themes possible. (Prove at http://blog.rompe.org/ ) - While parsing the header of an RSS feed one should be aware that there may be more <title> tags in subsections and that POSIX regular expressions are always gready. So make shure we don't get too much. (If you agree that using PCRE instead of the POSIX ones would be generally a good idea, then I am willing to make the patch, but for now I didn't want to mix POSIX and PCRE in one file.) (Prove at http://blog.rompe.org/index.php?q=import/feed/43 , try this feed without my patch) - Some RSS 2.0 feeds don't have a per item <link> section but have the permalink embedded in the <guid> section. This is not perfectly correct and the documentation mentions this possibility only in the examples, but since Dave Winer himself implements it this way it will happen more than once. So, if there is no link available and the guid looks like an address, then use that one. (Prove at http://blog.rompe.org/index.php?q=import/feed/22 , try this feed without my patch) - Don't only write eventually new Feed Header information into the database but also use them immediatly. Reuse the $feed array that is made for it. - If a feed doesn't provide per item titles, make shure to not produce defective markup by cutting the remainder of an entity. Instead of just cutting off anything behing the leading 30 characters of the cleaned description, it seems slicker to use up to 40 characters and split on word boundaries, but not on "&" or ";". (Prove also at http://blog.rompe.org/index.php?q=import/feed/22 . This feed will have title tags starting on February 1st, but I suspect many others without them out there.) With this patch one could consider Drupals aggregator RSS 2.0 ready.
author: Dries Buytaert <dries@buytaert.net> 2003-01-07 19:09:42 +0000
committer: Dries Buytaert <dries@buytaert.net> 2003-01-07 19:09:42 +0000
commit: 485e1c394a31165f208b8899a03ba731995ac20d (patch)
tree: cb44975c750ef05093d12034921c9d5185f0167c /modules/aggregator
parent: 337b80b1a6116863dd5a4c13ecf32cd3c147c763 (diff)
download: brdo-485e1c394a31165f208b8899a03ba731995ac20d.tar.gz
brdo-485e1c394a31165f208b8899a03ba731995ac20d.tar.bz2
1 files changed, 33 insertions, 15 deletions
diff --git a/modules/aggregator/aggregator.module b/modules/aggregator/aggregator.module
index 25d218c02..77c49bc0d 100644
--- a/modules/aggregator/aggregator.module
+++ b/modules/aggregator/aggregator.module
@@ -67,7 +67,7 @@ function import_format_item($item, $feed = 0) {
   }
 
   // external link
-  $output .= "<a href=\"$item->link\" target=\"new\">$item->title</a>";
+  $output .= "<a href=\"$item->link\" target=\"_new\">$item->title</a>";
 
   return $output ."<br />";
 }
@@ -116,13 +116,13 @@ function import_block($op, $delta) {
     $feed = db_fetch_object(db_query("SELECT * FROM feed WHERE fid = '%d'", $delta));
     if ($feed) {
       $block["subject"] = $feed->title;
-      $block["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
+      $block["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
     }
     else {
       // it was a bundle. this is NOT elegant
       $bundle = db_fetch_object(db_query("SELECT * FROM bundle WHERE bid = '%d'", $delta));
       $block["subject"] = $bundle->title;
-      $block["content"] = import_bundle_block($bundle->attributes) ."<p><div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div></p>";
+      $block["content"] = import_bundle_block($bundle->attributes) ."<div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div>";
     }
 
     return $block;
@@ -150,7 +150,7 @@ function import_get_feeds($attributes = 0) {
   $result = db_query("SELECT * FROM feed ORDER BY fid");
   while ($feed = db_fetch_object($result)) {
     $block[$feed->fid]["subject"] = $feed->title;
-    $block[$feed->fid]["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>";
+    $block[$feed->fid]["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>";
     $block[$feed->fid]["info"] = "$feed->title feed";
   }
 
@@ -200,18 +200,18 @@ function import_refresh($feed) {
 
     $channel = ereg_replace("<item([^s].*)</item>", "", $data);
 
-    eregi("<title>(.*)</title>", $channel, $title);
-    eregi("<link>(.*)</link>", $channel, $link);
-    eregi("<description>(.*)</description>", $channel, $description);
+    eregi("<title>([^<]*)</title>", $channel, $title);
+    eregi("<link>([^<]*)</link>", $channel, $link);
+    eregi("<description>([^<]*)</description>", $channel, $description);
 
     /*
     ** Strip invalid tags and provide default values (if required):
     */
 
-    $link = strip_tags($link[1]);
-    $description = filter(strtr($description[1], $tt));
+    $feed["link"] = strip_tags($link[1]);
+    $feed["description"] = filter(strtr($description[1], $tt));
 
-    db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $link, $description, $feed["fid"]);
+    db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $feed["link"], $feed["description"], $feed["fid"]);
 
     /*
     ** Extract and process individual items:
@@ -227,19 +227,37 @@ function import_refresh($feed) {
 
       $t = eregi("<title>(.*)</title>", $item, $title);
       $l = eregi("<link>(.*)</link>", $item, $link);
+      $g = eregi("<guid.*>(.*)</guid>", $item, $guid);
       $a = eregi("<author>(.*)</author>", $item, $author);
       $d = eregi("<description>(.*)</description>", $item, $description);
 
-      if ($t || $l || $a || $d) {
+      if ($t || $l || $g || $a || $d) {
 
         /*
         ** Strip invalid tags and provide default values (if required):
         */
 
-        $title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt));
-        $link = strip_tags($link[1] ? $link[1] : $feed["link"]);
-        $author = strip_tags($author[1]);
         $description = filter(strtr($description[1], $tt));
+  if ($title[1]) {
+    $title = strip_tags(strtr($title[1], $tt));
+  }
+  else {
+    /*
+          ** Use up to 40 characters of the $description, ending at
+          ** word boundary, but don't split potential entities.
+          */
+    $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", substr(strip_tags($description), 0, 40));
+  }
+  if ($link[1]) {
+    $link = strip_tags($link[1]);
+  }
+  elseif ($guid[1] && (strncmp($guid[1], "http://", 7) == 0)) {
+    $link = strip_tags($guid[1]);
+  }
+  else {
+    $link = $feed["link"];
+  }
+        $author = strip_tags($author[1]);
 
         // print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />";
 
@@ -705,4 +723,4 @@ function import_page() {
   }
 }
 
-?>
-\ No newline at end of file
+?>
author	Dries Buytaert <dries@buytaert.net>	2003-01-07 19:09:42 +0000
committer	Dries Buytaert <dries@buytaert.net>	2003-01-07 19:09:42 +0000
commit	485e1c394a31165f208b8899a03ba731995ac20d (patch)
tree	cb44975c750ef05093d12034921c9d5185f0167c /modules/aggregator
parent	337b80b1a6116863dd5a4c13ecf32cd3c147c763 (diff)
download	brdo-485e1c394a31165f208b8899a03ba731995ac20d.tar.gz brdo-485e1c394a31165f208b8899a03ba731995ac20d.tar.bz2