diff options
author | Dries Buytaert <dries@buytaert.net> | 2001-08-04 13:37:41 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2001-08-04 13:37:41 +0000 |
commit | 97fe958df9e66be1c08add741950d1b80267fc31 (patch) | |
tree | 0a3888c18b9ebfab4b327698842f312ab6c04819 /modules/aggregator | |
parent | 6e15c4b55fb96ed0aa2b61f0a50b23cf0babc56f (diff) | |
download | brdo-97fe958df9e66be1c08add741950d1b80267fc31.tar.gz brdo-97fe958df9e66be1c08add741950d1b80267fc31.tar.bz2 |
- various improvements and small additions
Diffstat (limited to 'modules/aggregator')
-rw-r--r-- | modules/aggregator/aggregator.module | 226 |
1 files changed, 145 insertions, 81 deletions
diff --git a/modules/aggregator/aggregator.module b/modules/aggregator/aggregator.module index 807ed9098..586a7d3c4 100644 --- a/modules/aggregator/aggregator.module +++ b/modules/aggregator/aggregator.module @@ -139,86 +139,79 @@ function import_refresh($feed) { ** Grab the headlines: */ - $url = parse_url($feed[url]); - $fp = fsockopen($url[host], ($url[port] ? $url[port] : 80), $errno, $errstr, 15); - - if ($fp) { + if ($fp = fopen($feed[url], "r")) { // fetch data: - fputs($fp, "GET $url[path]?$url[query] HTTP/1.0\r\nUser-Agent: ". variable_get("site_name", "drupal") ."\r\nHost: $url[host]\r\nAccept: */*\r\n\r\n"); - - while (!feof($fp)) $data .= fgets($fp, 128); + while (!feof($fp)) { + $data .= fgets($fp, 128); + } // initialize the translation table: $tt = array_flip(get_html_translation_table(HTML_ENTITIES)); $tt["'"] = "'"; - if (strstr($data, "200 OK")) { - /* - ** Remove expired items: - */ - - db_query("DELETE FROM item WHERE fid = '$feed[fid]' AND timestamp < ". (time() - $feed[uncache])); + /* + ** Remove expired items: + */ + db_query("DELETE FROM item WHERE fid = '$feed[fid]' AND timestamp < ". (time() - $feed[uncache])); - /* - ** Remove unsupported tags or sub-elements: - */ + /* + ** Remove unsupported tags or sub-elements: + */ - $data = ereg_replace("<textinput([^s].*)</textinput>", "", $data); - $data = ereg_replace("<image([^s].*)</image>", "", $data); + $data = ereg_replace("<textinput([^s].*)</textinput>", "", $data); + $data = ereg_replace("<image([^s].*)</image>", "", $data); - /* - ** Extract and process channel information: - */ + /* + ** Extract and process channel information: + */ - $channel = ereg_replace("<item([^s].*)</item>", "", $data); + $channel = ereg_replace("<item([^s].*)</item>", "", $data); - eregi("<title>(.*)</title>", $channel, $title); - eregi("<link>(.*)</link>", $channel, $link); - eregi("<description>(.*)</description>", $channel, $description); + eregi("<title>(.*)</title>", $channel, $title); + eregi("<link>(.*)</link>", $channel, $link); + eregi("<description>(.*)</description>", $channel, $description); - db_query("UPDATE feed SET timestamp = '". time() ."', link = '". check_input($link[1]) ."', description = '". check_input($description[1]) ."' WHERE fid = '". $feed[fid] ."'"); + db_query("UPDATE feed SET timestamp = '". time() ."', link = '". check_input($link[1]) ."', description = '". check_input($description[1]) ."' WHERE fid = '". $feed[fid] ."'"); - /* - ** Extract and process individual items: - */ + /* + ** Extract and process individual items: + */ - eregi("<item([^s].*)</item>", $data, $data); + eregi("<item([^s].*)</item>", $data, $data); + // print "<pre>". htmlentities($data[0]) ."</pre>"; - // print "<pre>". htmlentities($data[0]) ."</pre>"; + $items = array_reverse(explode("</item>", $data[0])); - $items = array_reverse(explode("</item>", $data[0])); + foreach ($items as $item) { + unset($title, $link, $author, $description); - foreach ($items as $item) { - unset($title, $link, $author, $description); + $t = eregi("<title>(.*)</title>", $item, $title); + $l = eregi("<link>(.*)</link>", $item, $link); + $a = eregi("<author>(.*)</author>", $item, $author); + $d = eregi("<description>(.*)</description>", $item, $description); - $t = eregi("<title>(.*)</title>", $item, $title); - $l = eregi("<link>(.*)</link>", $item, $link); - $a = eregi("<author>(.*)</author>", $item, $author); - $d = eregi("<description>(.*)</description>", $item, $description); + if ($t || $l || $a || $d) { - if ($t || $l || $a || $d) { + /* + ** Strip invalid tags and provide default values (if required): + */ - /* - ** Strip invalid tags and provide default values (if required): - */ + $title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt)); + $link = $link[1] ? $link[1] : $feed[link]; + $description = strtr($description[1], $tt); - $title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt)); - $link = $link[1] ? $link[1] : $feed[link]; - $description = strtr($description[1], $tt); + // print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />"; - print "<pre>title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."</pre><hr />"; + /* + ** Save this item: + */ - /* - ** Save this item: - */ - - import_save_item(array(fid => $feed[fid], title => $title, link => $link, author => $author[1], description => $description, attributes => $feed[attributes])); - } + import_save_item(array(fid => $feed[fid], title => $title, link => $link, author => $author[1], description => $description, attributes => $feed[attributes])); } } - else { - watchdog("warning", "failed to syndicate from '$feed[title]'". ($errstr ? ": $errstr" : "")); - } + } + else { + watchdog("warning", "failed to syndicate from '$feed[title]'". ($errstr ? ": $errstr" : "")); } return "feed '$feed[title]' updated."; @@ -273,8 +266,8 @@ function import_form_feed($edit = array()) { $period = array(900 => format_interval(900), 1800 => format_interval(1800), 3600 => format_interval(3600), 7200 => format_interval(7200), 10800 => format_interval(10800), 21600 => format_interval(21600), 32400 => format_interval(32400), 43200 => format_interval(43200), 64800 => format_interval(64800), 86400 => format_interval(86400), 172800 => format_interval(172800), 259200 => format_interval(259200), 604800 => format_interval(604800), 1209600 => format_interval(1209600), 2419200 => format_interval(2419200)); - if ($edit[refresh]== "") $edit[refresh] = 3600; - if ($edit[uncache]== "") $edit[uncache] = 2419200; + if ($edit[refresh] == "") $edit[refresh] = 3600; + if ($edit[uncache] == "") $edit[uncache] = 2419200; $form .= form_textfield("Title", "title", $edit[title], 50, 64, "The name of the feed; typically the name of the website you syndicate content from."); $form .= form_textfield("Url", "url", $edit[url], 50, 128, "The fully-qualified URL of the feed."); @@ -345,23 +338,68 @@ function import_view() { return $output; } -function import_view_feed() { - $result = db_query("SELECT * FROM feed ORDER BY title"); - while ($feed = db_fetch_object($result)) { - $output .= "<b>$feed->title</b>". ($feed->link ? " (". format_url($feed->link) .")" : "") ."<ul>". check_output($feed->description) ."</ul>"; +function import_fd_form() { + global $REQUEST_URI; + + $form .= form_textfield("Feed directory file", "url", "http://127.0.0.1/1.fd", 64, 128, "The fully-qualified URL of the feed directory file."); + $form .= form_submit("Collect feeds"); + + return form($REQUEST_URI, $form); +} + +function import_fd_collect($edit) { + global $REQUEST_URI; + + set_time_limit(180); + + if ($fp = fopen($edit[url], "r")) { + // fetch data: + while (!feof($fp)) { + $data .= fgets($fp, 128); + } + + // initialize the translation table: + $tt = array_flip(get_html_translation_table(HTML_ENTITIES)); + $tt["'"] = "'"; + + $items = explode("</channel>", $data); + + $output .= "<form action=\"$REQUEST_URI\" method=\"POST\">\n"; + + foreach ($items as $item) { + unset ($link, $title); + + // print "<pre>item = ". htmlentities($item) ."\n\n</pre>"; + + eregi("<link>(.*)</link>", $item, $link); + eregi("<title>(.*)</title>", $item, $title); + + $link = strip_tags(strtr($link[1], $tt)); + $title = strip_tags(strtr($title[1], $tt)); + + // print "<b>title = $title, link = $link<br /></b>"; + if ($link && $link && !db_fetch_array(db_query("SELECT * FROM feed WHERE url = '". check_input($link) ."'"))) { + $output .= "<input type=\"checkbox\" name=\"edit[$title]\" value=\"$link\"> ". strtr($title, $tt) ."<br />"; + } + } + + $output .= "<input type=\"submit\" name=\"op\" value=\"Import feeds\" />\n"; + $output .= "</form>"; + + return $output; + } + else { + print status("failed to open '$edit[url]': $errstr."); } - return $output; } -function import_view_bundle() { - $result = db_query("SELECT * FROM bundle ORDER BY title"); - while ($bundle = db_fetch_object($result)) { - $output .= "<b>$bundle->title</b><ul>". import_bundle_block($bundle->attributes) ."</ul>"; +function import_fd_import($edit) { + foreach ($edit as $title => $link) { + import_save_feed(array("title" => $title, "url" => $link, "refresh" => 3600, "uncache" => 2419200)); } - return $output; } -function import_view_item() { +function import_tag() { global $REQUEST_URI; $result = db_query("SELECT i.*, f.title AS feed FROM item i LEFT JOIN feed f ON i.fid = f.fid ORDER BY i.iid DESC LIMIT 50"); @@ -384,7 +422,7 @@ function import_admin() { if (user_access("administer news feeds")) { - print "<small><a href=\"admin.php?mod=import&type=feed&op=add\">add new feed</a> | <a href=\"admin.php?mod=import&type=bundle&op=add\">add new bundle</a> | <a href=\"admin.php?mod=import&type=feed&op=view\">available feeds</a> | <a href=\"admin.php?mod=import&type=bundle&op=view\">available bundles</a> | <a href=\"admin.php?mod=import&type=item&op=view\">available items</a> | <a href=\"admin.php?mod=import&op=view\">overview</a> | <a href=\"admin.php?mod=import&op=help\">help</a></small><hr />"; + print "<small><a href=\"admin.php?mod=import&type=feed&op=add\">add new feed</a> | <a href=\"admin.php?mod=import&type=bundle&op=add\">add new bundle</a> | <a href=\"admin.php?mod=import&op=fd\">import feeds</a> | <a href=\"admin.php?mod=import&op=tag\">tag items</a> | <a href=\"admin.php?mod=import&op=view\">overview</a> | <a href=\"admin.php?mod=import&op=help\">help</a></small><hr />"; switch($op) { case "help": @@ -402,6 +440,16 @@ function import_admin() { else print import_form_feed(import_get_feed($id)); break; + case "fd": + print import_fd_form(); + break; + case "Collect feeds": + print import_fd_collect($edit); + break; + case "Import feeds": + print import_fd_import($edit); + print import_view(); + break; case "remove": print status(import_remove(import_get_feed($id))); print import_view(); @@ -410,9 +458,12 @@ function import_admin() { print status(import_refresh(import_get_feed($id))); print import_view(); break; + case "tag": + print import_tag(); + break; case "Save attributes": print status(import_save_attributes($edit)); - print import_view_item(); + print import_tag(); break; case "Delete": $edit[title] = 0; @@ -426,14 +477,7 @@ function import_admin() { print import_view(); break; default: - if ($type == "bundle") - print import_view_bundle(); - else if ($type == "feed") - print import_view_feed(); - else if ($type == "item") - print import_view_item(); - else - print import_view(); + print import_view(); } } else { @@ -486,7 +530,7 @@ function import_page_feed($fid) { $header .= "<p><b>". t("Website") .":</b><div style=\"margin-left: 20px;\">". format_url($feed->link) ."</div></p>"; $header .= "<p><b>". t("Description") .":</b><div style=\"margin-left: 20px;\">". check_output($feed->description) ."</div></p>"; - $header .= "<p><b>". t("Last update") .":</b><div style=\"margin-left: 20px;\">". format_interval(time() - $feed->timestamp) ." ". t("ago") ."</div></p>"; + $header .= "<p><b>". t("Last update") .":</b><div style=\"margin-left: 20px;\">". format_interval(time() - $feed->timestamp) ." ". t("ago") ."<a href=\"$feed->url\"><img src=\"". $theme->image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" /></a><br /><br /></div></p>\n"; $result = db_query("SELECT * FROM item WHERE fid = '". check_input($fid) ."' ORDER BY iid DESC LIMIT ". variable_get("import_page_limit", 75)); @@ -505,7 +549,6 @@ function import_page_feed($fid) { unset($links); } $output .= "</table>\n"; - $output .= "<a href=\"$feed->url\"><img src=\"". $theme->image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" /></a>\n"; $theme->header(); $theme->box(check_output($feed->title), $header); @@ -555,17 +598,35 @@ function import_page_sources() { $result = db_query("SELECT * FROM feed ORDER BY title"); while ($feed = db_fetch_object($result)) { - $output .= "<a href=\"$feed->url\"><img src=\"". $theme->image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" /></a>\n"; $output .= format_url("module.php?mod=import&op=feed&id=$feed->fid", $feed->title); $output .= "<p><div style=\"margin-left: 20px;\">". check_output($feed->description) ."</div></p>"; } + $output .= "<a href=\"module.php?mod=import&op=fd\"><img src=\"". $theme->image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" /></a><br />\n"; + $theme->header(); $theme->box(t("News feeds"), import_page_info()); $theme->box(t("News sources"), $output); $theme->footer(); } +function import_page_fd() { + + $result = db_query("SELECT * FROM feed ORDER BY title"); + + $output .= "<?xml version=\"1.0\"?>\n\n"; + + while ($feed = db_fetch_object($result)) { + $output .= "<channel>\n"; + $output .= " <title>". htmlentities($feed->title) ."</title>\n"; + $output .= " <link>". htmlentities($feed->url) ."</link>\n"; + $output .= "</channel>\n\n"; + } + + + print $output; +} + function import_page_bundles() { import_page_blocks(import_get_bundles()); } @@ -622,6 +683,9 @@ function import_page() { case "sources": import_page_sources(); break; + case "fd": + import_page_fd(); + break; default: import_page_last(); } |