From 97fe958df9e66be1c08add741950d1b80267fc31 Mon Sep 17 00:00:00 2001 From: Dries Buytaert Date: Sat, 4 Aug 2001 13:37:41 +0000 Subject: - various improvements and small additions --- modules/aggregator.module | 226 +++++++++++++++++++++++++++++----------------- 1 file changed, 145 insertions(+), 81 deletions(-) (limited to 'modules/aggregator.module') diff --git a/modules/aggregator.module b/modules/aggregator.module index 807ed9098..586a7d3c4 100644 --- a/modules/aggregator.module +++ b/modules/aggregator.module @@ -139,86 +139,79 @@ function import_refresh($feed) { ** Grab the headlines: */ - $url = parse_url($feed[url]); - $fp = fsockopen($url[host], ($url[port] ? $url[port] : 80), $errno, $errstr, 15); - - if ($fp) { + if ($fp = fopen($feed[url], "r")) { // fetch data: - fputs($fp, "GET $url[path]?$url[query] HTTP/1.0\r\nUser-Agent: ". variable_get("site_name", "drupal") ."\r\nHost: $url[host]\r\nAccept: */*\r\n\r\n"); - - while (!feof($fp)) $data .= fgets($fp, 128); + while (!feof($fp)) { + $data .= fgets($fp, 128); + } // initialize the translation table: $tt = array_flip(get_html_translation_table(HTML_ENTITIES)); $tt["'"] = "'"; - if (strstr($data, "200 OK")) { - /* - ** Remove expired items: - */ - - db_query("DELETE FROM item WHERE fid = '$feed[fid]' AND timestamp < ". (time() - $feed[uncache])); + /* + ** Remove expired items: + */ + db_query("DELETE FROM item WHERE fid = '$feed[fid]' AND timestamp < ". (time() - $feed[uncache])); - /* - ** Remove unsupported tags or sub-elements: - */ + /* + ** Remove unsupported tags or sub-elements: + */ - $data = ereg_replace("", "", $data); - $data = ereg_replace("", "", $data); + $data = ereg_replace("", "", $data); + $data = ereg_replace("", "", $data); - /* - ** Extract and process channel information: - */ + /* + ** Extract and process channel information: + */ - $channel = ereg_replace("", "", $data); + $channel = ereg_replace("", "", $data); - eregi("(.*)", $channel, $title); - eregi("(.*)", $channel, $link); - eregi("(.*)", $channel, $description); + eregi("(.*)", $channel, $title); + eregi("(.*)", $channel, $link); + eregi("(.*)", $channel, $description); - db_query("UPDATE feed SET timestamp = '". time() ."', link = '". check_input($link[1]) ."', description = '". check_input($description[1]) ."' WHERE fid = '". $feed[fid] ."'"); + db_query("UPDATE feed SET timestamp = '". time() ."', link = '". check_input($link[1]) ."', description = '". check_input($description[1]) ."' WHERE fid = '". $feed[fid] ."'"); - /* - ** Extract and process individual items: - */ + /* + ** Extract and process individual items: + */ - eregi("", $data, $data); + eregi("", $data, $data); + // print "
". htmlentities($data[0]) ."
"; - // print "
". htmlentities($data[0]) ."
"; + $items = array_reverse(explode("", $data[0])); - $items = array_reverse(explode("", $data[0])); + foreach ($items as $item) { + unset($title, $link, $author, $description); - foreach ($items as $item) { - unset($title, $link, $author, $description); + $t = eregi("(.*)", $item, $title); + $l = eregi("(.*)", $item, $link); + $a = eregi("(.*)", $item, $author); + $d = eregi("(.*)", $item, $description); - $t = eregi("(.*)", $item, $title); - $l = eregi("(.*)", $item, $link); - $a = eregi("(.*)", $item, $author); - $d = eregi("(.*)", $item, $description); + if ($t || $l || $a || $d) { - if ($t || $l || $a || $d) { + /* + ** Strip invalid tags and provide default values (if required): + */ - /* - ** Strip invalid tags and provide default values (if required): - */ + $title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt)); + $link = $link[1] ? $link[1] : $feed[link]; + $description = strtr($description[1], $tt); - $title = strip_tags(strtr($title[1] ? $title[1] : substr(strip_tags(strtr($description[1], $tt)), 0, 30), $tt)); - $link = $link[1] ? $link[1] : $feed[link]; - $description = strtr($description[1], $tt); + // print "
title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."

"; - print "
title = ". htmlentities($title) ."\n\ndescription = ". htmlentities($description) ."\n\nlink = ". htmlentities($link) ."

"; + /* + ** Save this item: + */ - /* - ** Save this item: - */ - - import_save_item(array(fid => $feed[fid], title => $title, link => $link, author => $author[1], description => $description, attributes => $feed[attributes])); - } + import_save_item(array(fid => $feed[fid], title => $title, link => $link, author => $author[1], description => $description, attributes => $feed[attributes])); } } - else { - watchdog("warning", "failed to syndicate from '$feed[title]'". ($errstr ? ": $errstr" : "")); - } + } + else { + watchdog("warning", "failed to syndicate from '$feed[title]'". ($errstr ? ": $errstr" : "")); } return "feed '$feed[title]' updated."; @@ -273,8 +266,8 @@ function import_form_feed($edit = array()) { $period = array(900 => format_interval(900), 1800 => format_interval(1800), 3600 => format_interval(3600), 7200 => format_interval(7200), 10800 => format_interval(10800), 21600 => format_interval(21600), 32400 => format_interval(32400), 43200 => format_interval(43200), 64800 => format_interval(64800), 86400 => format_interval(86400), 172800 => format_interval(172800), 259200 => format_interval(259200), 604800 => format_interval(604800), 1209600 => format_interval(1209600), 2419200 => format_interval(2419200)); - if ($edit[refresh]== "") $edit[refresh] = 3600; - if ($edit[uncache]== "") $edit[uncache] = 2419200; + if ($edit[refresh] == "") $edit[refresh] = 3600; + if ($edit[uncache] == "") $edit[uncache] = 2419200; $form .= form_textfield("Title", "title", $edit[title], 50, 64, "The name of the feed; typically the name of the website you syndicate content from."); $form .= form_textfield("Url", "url", $edit[url], 50, 128, "The fully-qualified URL of the feed."); @@ -345,23 +338,68 @@ function import_view() { return $output; } -function import_view_feed() { - $result = db_query("SELECT * FROM feed ORDER BY title"); - while ($feed = db_fetch_object($result)) { - $output .= "$feed->title". ($feed->link ? " (". format_url($feed->link) .")" : "") ."
    ". check_output($feed->description) ."
"; +function import_fd_form() { + global $REQUEST_URI; + + $form .= form_textfield("Feed directory file", "url", "http://127.0.0.1/1.fd", 64, 128, "The fully-qualified URL of the feed directory file."); + $form .= form_submit("Collect feeds"); + + return form($REQUEST_URI, $form); +} + +function import_fd_collect($edit) { + global $REQUEST_URI; + + set_time_limit(180); + + if ($fp = fopen($edit[url], "r")) { + // fetch data: + while (!feof($fp)) { + $data .= fgets($fp, 128); + } + + // initialize the translation table: + $tt = array_flip(get_html_translation_table(HTML_ENTITIES)); + $tt["'"] = "'"; + + $items = explode("", $data); + + $output .= "
\n"; + + foreach ($items as $item) { + unset ($link, $title); + + // print "
item = ". htmlentities($item) ."\n\n
"; + + eregi("(.*)", $item, $link); + eregi("(.*)", $item, $title); + + $link = strip_tags(strtr($link[1], $tt)); + $title = strip_tags(strtr($title[1], $tt)); + + // print "title = $title, link = $link
"; + if ($link && $link && !db_fetch_array(db_query("SELECT * FROM feed WHERE url = '". check_input($link) ."'"))) { + $output .= " ". strtr($title, $tt) ."
"; + } + } + + $output .= "\n"; + $output .= "
"; + + return $output; + } + else { + print status("failed to open '$edit[url]': $errstr."); } - return $output; } -function import_view_bundle() { - $result = db_query("SELECT * FROM bundle ORDER BY title"); - while ($bundle = db_fetch_object($result)) { - $output .= "$bundle->title
    ". import_bundle_block($bundle->attributes) ."
"; +function import_fd_import($edit) { + foreach ($edit as $title => $link) { + import_save_feed(array("title" => $title, "url" => $link, "refresh" => 3600, "uncache" => 2419200)); } - return $output; } -function import_view_item() { +function import_tag() { global $REQUEST_URI; $result = db_query("SELECT i.*, f.title AS feed FROM item i LEFT JOIN feed f ON i.fid = f.fid ORDER BY i.iid DESC LIMIT 50"); @@ -384,7 +422,7 @@ function import_admin() { if (user_access("administer news feeds")) { - print "add new feed | add new bundle | available feeds | available bundles | available items | overview | help
"; + print "add new feed | add new bundle | import feeds | tag items | overview | help
"; switch($op) { case "help": @@ -402,6 +440,16 @@ function import_admin() { else print import_form_feed(import_get_feed($id)); break; + case "fd": + print import_fd_form(); + break; + case "Collect feeds": + print import_fd_collect($edit); + break; + case "Import feeds": + print import_fd_import($edit); + print import_view(); + break; case "remove": print status(import_remove(import_get_feed($id))); print import_view(); @@ -410,9 +458,12 @@ function import_admin() { print status(import_refresh(import_get_feed($id))); print import_view(); break; + case "tag": + print import_tag(); + break; case "Save attributes": print status(import_save_attributes($edit)); - print import_view_item(); + print import_tag(); break; case "Delete": $edit[title] = 0; @@ -426,14 +477,7 @@ function import_admin() { print import_view(); break; default: - if ($type == "bundle") - print import_view_bundle(); - else if ($type == "feed") - print import_view_feed(); - else if ($type == "item") - print import_view_item(); - else - print import_view(); + print import_view(); } } else { @@ -486,7 +530,7 @@ function import_page_feed($fid) { $header .= "

". t("Website") .":

". format_url($feed->link) ."

"; $header .= "

". t("Description") .":

". check_output($feed->description) ."

"; - $header .= "

". t("Last update") .":

". format_interval(time() - $feed->timestamp) ." ". t("ago") ."

"; + $header .= "

". t("Last update") .":

". format_interval(time() - $feed->timestamp) ." ". t("ago") ."url\">image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" />

\n"; $result = db_query("SELECT * FROM item WHERE fid = '". check_input($fid) ."' ORDER BY iid DESC LIMIT ". variable_get("import_page_limit", 75)); @@ -505,7 +549,6 @@ function import_page_feed($fid) { unset($links); } $output .= "\n"; - $output .= "url\">image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" />\n"; $theme->header(); $theme->box(check_output($feed->title), $header); @@ -555,17 +598,35 @@ function import_page_sources() { $result = db_query("SELECT * FROM feed ORDER BY title"); while ($feed = db_fetch_object($result)) { - $output .= "url\">image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" />\n"; $output .= format_url("module.php?mod=import&op=feed&id=$feed->fid", $feed->title); $output .= "

". check_output($feed->description) ."

"; } + $output .= "image("xml.gif") ."\" width=\"36\" height=\"14\" align=\"right\" border=\"0\" />
\n"; + $theme->header(); $theme->box(t("News feeds"), import_page_info()); $theme->box(t("News sources"), $output); $theme->footer(); } +function import_page_fd() { + + $result = db_query("SELECT * FROM feed ORDER BY title"); + + $output .= "\n\n"; + + while ($feed = db_fetch_object($result)) { + $output .= "\n"; + $output .= " ". htmlentities($feed->title) ."\n"; + $output .= " ". htmlentities($feed->url) ."\n"; + $output .= "\n\n"; + } + + + print $output; +} + function import_page_bundles() { import_page_blocks(import_get_bundles()); } @@ -622,6 +683,9 @@ function import_page() { case "sources": import_page_sources(); break; + case "fd": + import_page_fd(); + break; default: import_page_last(); } -- cgit v1.2.3