diff options
author | Dries Buytaert <dries@buytaert.net> | 2003-01-07 19:09:42 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2003-01-07 19:09:42 +0000 |
commit | 485e1c394a31165f208b8899a03ba731995ac20d (patch) | |
tree | cb44975c750ef05093d12034921c9d5185f0167c /modules/aggregator | |
parent | 337b80b1a6116863dd5a4c13ecf32cd3c147c763 (diff) | |
download | brdo-485e1c394a31165f208b8899a03ba731995ac20d.tar.gz brdo-485e1c394a31165f208b8899a03ba731995ac20d.tar.bz2 |
Great patch from Ulf:
- The href target for a new window is "_new", not "new".
- Generating <div> sections within <p> sections is forbidden by the
XHTML standard. Using just the right aligned <div> should be
sufficient and makes XHTML themes possible.
(Prove at http://blog.rompe.org/ )
- While parsing the header of an RSS feed one should be aware that
there may be more <title> tags in subsections and that POSIX regular
expressions are always gready. So make shure we don't get too much.
(If you agree that using PCRE instead of the POSIX ones would be
generally a good idea, then I am willing to make the patch, but for
now I didn't want to mix POSIX and PCRE in one file.)
(Prove at http://blog.rompe.org/index.php?q=import/feed/43 , try
this feed without my patch)
- Some RSS 2.0 feeds don't have a per item <link> section but have the
permalink embedded in the <guid> section. This is not perfectly
correct and the documentation mentions this possibility only in the
examples, but since Dave Winer himself implements it this way it
will happen more than once. So, if there is no link available and
the guid looks like an address, then use that one.
(Prove at http://blog.rompe.org/index.php?q=import/feed/22 , try
this feed without my patch)
- Don't only write eventually new Feed Header information into the
database but also use them immediatly. Reuse the $feed array that is
made for it.
- If a feed doesn't provide per item titles, make shure to not produce
defective markup by cutting the remainder of an entity. Instead of
just cutting off anything behing the leading 30 characters of the
cleaned description, it seems slicker to use up to 40 characters and
split on word boundaries, but not on "&" or ";".
(Prove also at http://blog.rompe.org/index.php?q=import/feed/22 .
This feed will have title tags starting on February 1st, but I
suspect many others without them out there.)
With this patch one could consider Drupals aggregator RSS 2.0 ready.
Diffstat (limited to 'modules/aggregator')
-rw-r--r-- | modules/aggregator/aggregator.module | 48 |
1 files changed, 33 insertions, 15 deletions
diff --git a/modules/aggregator/aggregator.module b/modules/aggregator/aggregator.module index 25d218c02..77c49bc0d 100644 --- a/modules/aggregator/aggregator.module +++ b/modules/aggregator/aggregator.module @@ -67,7 +67,7 @@ function import_format_item($item, $feed = 0) { } // external link - $output .= "<a href=\"$item->link\" target=\"new\">$item->title</a>"; + $output .= "<a href=\"$item->link\" target=\"_new\">$item->title</a>"; return $output ."<br />"; } @@ -116,13 +116,13 @@ function import_block($op, $delta) { $feed = db_fetch_object(db_query("SELECT * FROM feed WHERE fid = '%d'", $delta)); if ($feed) { $block["subject"] = $feed->title; - $block["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>"; + $block["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>"; } else { // it was a bundle. this is NOT elegant $bundle = db_fetch_object(db_query("SELECT * FROM bundle WHERE bid = '%d'", $delta)); $block["subject"] = $bundle->title; - $block["content"] = import_bundle_block($bundle->attributes) ."<p><div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div></p>"; + $block["content"] = import_bundle_block($bundle->attributes) ."<div align=\"right\">". l(t("more"), "import/bundle/$bundle->bid", array("title" => t("View this bundle's recent news."))) ."</div>"; } return $block; @@ -150,7 +150,7 @@ function import_get_feeds($attributes = 0) { $result = db_query("SELECT * FROM feed ORDER BY fid"); while ($feed = db_fetch_object($result)) { $block[$feed->fid]["subject"] = $feed->title; - $block[$feed->fid]["content"] = import_feed_block($feed) ."<p><div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div></p>"; + $block[$feed->fid]["content"] = import_feed_block($feed) ."<div align=\"right\">". l(t("more"), "import/feed/$feed->fid", array("title" => t("View this feed's recent news."))) ."</div>"; $block[$feed->fid]["info"] = "$feed->title feed"; } @@ -200,18 +200,18 @@ function import_refresh($feed) { $channel = ereg_replace("<item([^s].*)</item>", "", $data); - eregi("<title>(.*)</title>", $channel, $title); - eregi("<link>(.*)</link>", $channel, $link); - eregi("<description>(.*)</description>", $channel, $description); + eregi("<title>([^<]*)</title>", $channel, $title); + eregi("<link>([^<]*)</link>", $channel, $link); + eregi("<description>([^<]*)</description>", $channel, $description); /* ** Strip invalid tags and provide default values (if required): */ - $link = strip_tags($link[1]); - $description = filter(strtr($description[1], $tt)); + $feed["link"] = strip_tags($link[1]); + $feed["description"] = filter(strtr($description[1], $tt)); - db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $link, $description, $feed["fid"]); + db_query("UPDATE feed SET timestamp = '%s', link = '%s', description = '%s' WHERE fid = '%s'", time(), $feed["link"], $feed["description"], $feed["fid"]); /* ** Extract and process individual items: @@ -227,19 +227,37 @@ function import_refresh($feed) { $t = eregi("<title>(.*)</title>", $item, $title); $l = eregi("<link>(.*)</link>", $item, $link); + $g = eregi("<guid.*>(.*)</guid>", $item, $guid); $a = eregi("<author>(.*)</author>", $item, $author); $d = eregi("<description>(.*)</description>", $item, $description); - if ($t || $l || $a || $d) { + if ($t || $l || $g || $a || $d) { /* ** Strip invalid tags and provide default values (if required): */ - $title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt)); - $link = strip_tags($link[1] ? $link[1] : $feed["link"]); - $author = strip_tags($author[1]); $description = filter(strtr($description[1], $tt)); + if ($title[1]) { + $title = strip_tags(strtr($title[1], $tt)); + } + else { + /* + ** Use up to 40 characters of the $description, ending at + ** word boundary, but don't split potential entities. + */ + $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", substr(strip_tags($description), 0, 40)); + } + if ($link[1]) { + $link = strip_tags($link[1]); + } + elseif ($guid[1] && (strncmp($guid[1], "http://", 7) == 0)) { + $link = strip_tags($guid[1]); + } + else { + $link = $feed["link"]; + } + $author = strip_tags($author[1]); // print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />"; @@ -705,4 +723,4 @@ function import_page() { } } -?>
\ No newline at end of file +?> |