diff options
author | Andreas Gohr <andi@splitbrain.org> | 2005-09-02 22:12:30 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2005-09-02 22:12:30 +0200 |
commit | 94de3b7a8d1b188d29e7d94f9099db64c3d6cfad (patch) | |
tree | 7eb5228077fe2d0235393b9df76d2e73f38e7e6e | |
parent | 00a7b5ad8c432fcfb4f394ab300d802a0fe4c35c (diff) | |
download | rpg-94de3b7a8d1b188d29e7d94f9099db64c3d6cfad.tar.gz rpg-94de3b7a8d1b188d29e7d94f9099db64c3d6cfad.tar.bz2 |
HTTPClient is now used for RSS fetching
darcs-hash:20050902201230-7ad00-3446903b251caa6b0422603fc738b4d4ba872906.gz
-rw-r--r-- | conf/dokuwiki.php | 7 | ||||
-rw-r--r-- | inc/HTTPClient.php | 6 | ||||
-rw-r--r-- | inc/magpie/extlib/Snoopy.class.inc | 899 | ||||
-rw-r--r-- | inc/magpie/rss_fetch.inc | 44 |
4 files changed, 47 insertions, 909 deletions
diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php index 460f0a416..a264e8717 100644 --- a/conf/dokuwiki.php +++ b/conf/dokuwiki.php @@ -79,6 +79,13 @@ $conf['target']['extern'] = '_blank'; $conf['target']['media'] = ''; $conf['target']['windows'] = ''; +//Proxy setup - if your Server needs a proxy to access the web set these +$conf['proxy']['host'] = ''; +$conf['proxy']['port'] = ''; +$conf['proxy']['user'] = ''; +$conf['proxy']['pass'] = ''; +$conf['proxy']['ssl'] = ''; + /* Safemode Hack */ $conf['safemodehack'] = 0; //read http://wiki.splitbrain.org/wiki:safemodehack ! $conf['ftp']['host'] = 'localhost'; diff --git a/inc/HTTPClient.php b/inc/HTTPClient.php index 8167952c6..53bd941cd 100644 --- a/inc/HTTPClient.php +++ b/inc/HTTPClient.php @@ -35,7 +35,7 @@ class DokuHTTPClient extends HTTPClient { $this->proxy_port = $conf['proxy']['port']; $this->proxy_user = $conf['proxy']['user']; $this->proxy_pass = $conf['proxy']['pass']; - $this->proxy_ssl = $conf['proxy']['usessl']; + $this->proxy_ssl = $conf['proxy']['ssl']; } } @@ -140,6 +140,7 @@ class HTTPClient { */ function sendRequest($url,$data=array(),$method='GET'){ $this->error = ''; + $this->status = 0; // parse URL into bits $uri = parse_url($url); @@ -187,6 +188,7 @@ class HTTPClient { // open socket $socket = @fsockopen($server,$port,$errno, $errstr, $this->timeout); if (!$socket){ + $resp->status = '-100'; $this->error = "Could not connect to $server:$port\n$errstr ($errno)"; return $false; } @@ -287,7 +289,7 @@ class HTTPClient { $this->resp_headers['location']; } // perform redirected request, always via GET (required by RFC) - return $this->_sendRequest($this->resp_headers['location'],array(),'GET'); + return $this->sendRequest($this->resp_headers['location'],array(),'GET'); } } diff --git a/inc/magpie/extlib/Snoopy.class.inc b/inc/magpie/extlib/Snoopy.class.inc deleted file mode 100644 index 532ecba10..000000000 --- a/inc/magpie/extlib/Snoopy.class.inc +++ /dev/null @@ -1,899 +0,0 @@ -<?php - -/************************************************* - -Snoopy - the PHP net client -Author: Monte Ohrt <monte@ispi.net> -Copyright (c): 1999-2000 ispi, all rights reserved -Version: 1.0 - - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -You may contact the author of Snoopy by e-mail at: -monte@ispi.net - -Or, write to: -Monte Ohrt -CTO, ispi -237 S. 70th suite 220 -Lincoln, NE 68510 - -The latest version of Snoopy can be obtained from: -http://snoopy.sourceforge.com - -*************************************************/ - -class Snoopy -{ - /**** Public variables ****/ - - /* user definable vars */ - - var $host = "www.php.net"; // host name we are connecting to - var $port = 80; // port we are connecting to - var $proxy_host = ""; // proxy host to use - var $proxy_port = ""; // proxy port to use - var $agent = "Snoopy v1.0"; // agent we masquerade as - var $referer = ""; // referer info to pass - var $cookies = array(); // array of cookies to pass - // $cookies["username"]="joe"; - var $rawheaders = array(); // array of raw headers to send - // $rawheaders["Content-type"]="text/html"; - - var $maxredirs = 5; // http redirection depth maximum. 0 = disallow - var $lastredirectaddr = ""; // contains address of last redirected address - var $offsiteok = true; // allows redirection off-site - var $maxframes = 0; // frame content depth maximum. 0 = disallow - var $expandlinks = true; // expand links to fully qualified URLs. - // this only applies to fetchlinks() - // or submitlinks() - var $passcookies = true; // pass set cookies back through redirects - // NOTE: this currently does not respect - // dates, domains or paths. - - var $user = ""; // user for http authentication - var $pass = ""; // password for http authentication - - // http accept types - var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; - - var $results = ""; // where the content is put - - var $error = ""; // error messages sent here - var $response_code = ""; // response code returned from server - var $headers = array(); // headers returned from server sent here - var $maxlength = 500000; // max return data length (body) - var $read_timeout = 0; // timeout on read operations, in seconds - // supported only since PHP 4 Beta 4 - // set to 0 to disallow timeouts - var $timed_out = false; // if a read operation timed out - var $status = 0; // http request status - - var $curl_path = "/usr/bin/curl"; - // Snoopy will use cURL for fetching - // SSL content if a full system path to - // the cURL binary is supplied here. - // set to false if you do not have - // cURL installed. See http://curl.haxx.se - // for details on installing cURL. - // Snoopy does *not* use the cURL - // library functions built into php, - // as these functions are not stable - // as of this Snoopy release. - - // send Accept-encoding: gzip? - var $use_gzip = true; - - /**** Private variables ****/ - - var $_maxlinelen = 4096; // max line length (headers) - - var $_httpmethod = "GET"; // default http request method - var $_httpversion = "HTTP/1.0"; // default http request version - var $_submit_method = "POST"; // default submit method - var $_submit_type = "application/x-www-form-urlencoded"; // default submit type - var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type - var $_redirectaddr = false; // will be set if page fetched is a redirect - var $_redirectdepth = 0; // increments on an http redirect - var $_frameurls = array(); // frame src urls - var $_framedepth = 0; // increments on frame depth - - var $_isproxy = false; // set if using a proxy server - var $_fp_timeout = 30; // timeout for socket connection - -/*======================================================================*\ - Function: fetch - Purpose: fetch the contents of a web page - (and possibly other protocols in the - future like ftp, nntp, gopher, etc.) - Input: $URI the location of the page to fetch - Output: $this->results the output text from the fetch -\*======================================================================*/ - - function fetch($URI) - { - - //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); - $URI_PARTS = parse_url($URI); - if (!empty($URI_PARTS["user"])) - $this->user = $URI_PARTS["user"]; - if (!empty($URI_PARTS["pass"])) - $this->pass = $URI_PARTS["pass"]; - - switch($URI_PARTS["scheme"]) - { - case "http": - $this->host = $URI_PARTS["host"]; - if(!empty($URI_PARTS["port"])) - $this->port = $URI_PARTS["port"]; - if($this->_connect($fp)) - { - if($this->_isproxy) - { - // using proxy, send entire URI - $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); - } - else - { - $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); - // no proxy, send only the path - $this->_httprequest($path, $fp, $URI, $this->_httpmethod); - } - - $this->_disconnect($fp); - - if($this->_redirectaddr) - { - /* url was redirected, check if we've hit the max depth */ - if($this->maxredirs > $this->_redirectdepth) - { - // only follow redirect if it's on this site, or offsiteok is true - if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) - { - /* follow the redirect */ - $this->_redirectdepth++; - $this->lastredirectaddr=$this->_redirectaddr; - $this->fetch($this->_redirectaddr); - } - } - } - - if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) - { - $frameurls = $this->_frameurls; - $this->_frameurls = array(); - - while(list(,$frameurl) = each($frameurls)) - { - if($this->_framedepth < $this->maxframes) - { - $this->fetch($frameurl); - $this->_framedepth++; - } - else - break; - } - } - } - else - { - return false; - } - return true; - break; - case "https": - if(!$this->curl_path || (!is_executable($this->curl_path))) { - $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; - return false; - } - $this->host = $URI_PARTS["host"]; - if(!empty($URI_PARTS["port"])) - $this->port = $URI_PARTS["port"]; - if($this->_isproxy) - { - // using proxy, send entire URI - $this->_httpsrequest($URI,$URI,$this->_httpmethod); - } - else - { - $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); - // no proxy, send only the path - $this->_httpsrequest($path, $URI, $this->_httpmethod); - } - - if($this->_redirectaddr) - { - /* url was redirected, check if we've hit the max depth */ - if($this->maxredirs > $this->_redirectdepth) - { - // only follow redirect if it's on this site, or offsiteok is true - if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) - { - /* follow the redirect */ - $this->_redirectdepth++; - $this->lastredirectaddr=$this->_redirectaddr; - $this->fetch($this->_redirectaddr); - } - } - } - - if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) - { - $frameurls = $this->_frameurls; - $this->_frameurls = array(); - - while(list(,$frameurl) = each($frameurls)) - { - if($this->_framedepth < $this->maxframes) - { - $this->fetch($frameurl); - $this->_framedepth++; - } - else - break; - } - } - return true; - break; - default: - // not a valid protocol - $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; - return false; - break; - } - return true; - } - - - -/*======================================================================*\ - Private functions -\*======================================================================*/ - - -/*======================================================================*\ - Function: _striplinks - Purpose: strip the hyperlinks from an html document - Input: $document document to strip. - Output: $match an array of the links -\*======================================================================*/ - - function _striplinks($document) - { - preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= - ([\"\'])? # find single or double quote - (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching - # quote, otherwise match up to next space - 'isx",$document,$links); - - - // catenate the non-empty matches from the conditional subpattern - - while(list($key,$val) = each($links[2])) - { - if(!empty($val)) - $match[] = $val; - } - - while(list($key,$val) = each($links[3])) - { - if(!empty($val)) - $match[] = $val; - } - - // return the links - return $match; - } - -/*======================================================================*\ - Function: _stripform - Purpose: strip the form elements from an html document - Input: $document document to strip. - Output: $match an array of the links -\*======================================================================*/ - - function _stripform($document) - { - preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); - - // catenate the matches - $match = implode("\r\n",$elements[0]); - - // return the links - return $match; - } - - - -/*======================================================================*\ - Function: _striptext - Purpose: strip the text from an html document - Input: $document document to strip. - Output: $text the resulting text -\*======================================================================*/ - - function _striptext($document) - { - - // I didn't use preg eval (//e) since that is only available in PHP 4.0. - // so, list your entities one by one here. I included some of the - // more common ones. - - $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript - "'<[\/\!]*?[^<>]*?>'si", // strip out html tags - "'([\r\n])[\s]+'", // strip out white space - "'&(quote|#34);'i", // replace html entities - "'&(amp|#38);'i", - "'&(lt|#60);'i", - "'&(gt|#62);'i", - "'&(nbsp|#160);'i", - "'&(iexcl|#161);'i", - "'&(cent|#162);'i", - "'&(pound|#163);'i", - "'&(copy|#169);'i" - ); - $replace = array( "", - "", - "\\1", - "\"", - "&", - "<", - ">", - " ", - chr(161), - chr(162), - chr(163), - chr(169)); - - $text = preg_replace($search,$replace,$document); - - return $text; - } - -/*======================================================================*\ - Function: _expandlinks - Purpose: expand each link into a fully qualified URL - Input: $links the links to qualify - $URI the full URI to get the base from - Output: $expandedLinks the expanded links -\*======================================================================*/ - - function _expandlinks($links,$URI) - { - - preg_match("/^[^\?]+/",$URI,$match); - - $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); - - $search = array( "|^http://".preg_quote($this->host)."|i", - "|^(?!http://)(\/)?(?!mailto:)|i", - "|/\./|", - "|/[^\/]+/\.\./|" - ); - - $replace = array( "", - $match."/", - "/", - "/" - ); - - $expandedLinks = preg_replace($search,$replace,$links); - - return $expandedLinks; - } - -/*======================================================================*\ - Function: _httprequest - Purpose: go get the http data from the server - Input: $url the url to fetch - $fp the current open file pointer - $URI the full URI - $body body contents to send if any (POST) - Output: -\*======================================================================*/ - - function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") - { - if($this->passcookies && $this->_redirectaddr) - $this->setcookies(); - - $URI_PARTS = parse_url($URI); - if(empty($url)) - $url = "/"; - $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; - if(!empty($this->agent)) - $headers .= "User-Agent: ".$this->agent."\r\n"; - if(!empty($this->host) && !isset($this->rawheaders['Host'])) - $headers .= "Host: ".$this->host."\r\n"; - if(!empty($this->accept)) - $headers .= "Accept: ".$this->accept."\r\n"; - - if($this->use_gzip) { - // make sure PHP was built with --with-zlib - // and we can handle gzipp'ed data - if ( function_exists(gzinflate) ) { - $headers .= "Accept-encoding: gzip\r\n"; - } - else { - trigger_error( - "use_gzip is on, but PHP was built without zlib support.". - " Requesting file(s) without gzip encoding.", - E_USER_NOTICE); - } - } - - if(!empty($this->referer)) - $headers .= "Referer: ".$this->referer."\r\n"; - if(!empty($this->cookies)) - { - if(!is_array($this->cookies)) - $this->cookies = (array)$this->cookies; - - reset($this->cookies); - if ( count($this->cookies) > 0 ) { - $cookie_headers .= 'Cookie: '; - foreach ( $this->cookies as $cookieKey => $cookieVal ) { - $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; - } - $headers .= substr($cookie_headers,0,-2) . "\r\n"; - } - } - if(!empty($this->rawheaders)) - { - if(!is_array($this->rawheaders)) - $this->rawheaders = (array)$this->rawheaders; - while(list($headerKey,$headerVal) = each($this->rawheaders)) - $headers .= $headerKey.": ".$headerVal."\r\n"; - } - if(!empty($content_type)) { - $headers .= "Content-type: $content_type"; - if ($content_type == "multipart/form-data") - $headers .= "; boundary=".$this->_mime_boundary; - $headers .= "\r\n"; - } - if(!empty($body)) - $headers .= "Content-length: ".strlen($body)."\r\n"; - if(!empty($this->user) || !empty($this->pass)) - $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; - - $headers .= "\r\n"; - - // set the read timeout if needed - if ($this->read_timeout > 0) - socket_set_timeout($fp, $this->read_timeout); - $this->timed_out = false; - - fwrite($fp,$headers.$body,strlen($headers.$body)); - - $this->_redirectaddr = false; - unset($this->headers); - - // content was returned gzip encoded? - $is_gzipped = false; - - while($currentHeader = fgets($fp,$this->_maxlinelen)) - { - if ($this->read_timeout > 0 && $this->_check_timeout($fp)) - { - $this->status=-100; - return false; - } - - // if($currentHeader == "\r\n") - if(preg_match("/^\r?\n$/", $currentHeader) ) - break; - - // if a header begins with Location: or URI:, set the redirect - if(preg_match("/^(Location:|URI:)/i",$currentHeader)) - { - // get URL portion of the redirect - preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); - // look for :// in the Location header to see if hostname is included - if(!preg_match("|\:\/\/|",$matches[2])) - { - // no host in the path, so prepend - $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; - // eliminate double slash - if(!preg_match("|^/|",$matches[2])) - $this->_redirectaddr .= "/".$matches[2]; - else - $this->_redirectaddr .= $matches[2]; - } - else - $this->_redirectaddr = $matches[2]; - } - - if(preg_match("|^HTTP/|",$currentHeader)) - { - if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) - { - $this->status= $status[1]; - } - $this->response_code = $currentHeader; - } - - if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { - $is_gzipped = true; - } - - $this->headers[] = $currentHeader; - } - - # $results = fread($fp, $this->maxlength); - $results = ""; - while ( $data = fread($fp, $this->maxlength) ) { - $results .= $data; - if ( - strlen($results) > $this->maxlength ) { - break; - } - } - - // gunzip - if ( $is_gzipped ) { - // per http://www.php.net/manual/en/function.gzencode.php - $results = substr($results, 10); - $results = gzinflate($results); - } - - if ($this->read_timeout > 0 && $this->_check_timeout($fp)) - { - $this->status=-100; - return false; - } - - // check if there is a a redirect meta tag - - if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) - { - $this->_redirectaddr = $this->_expandlinks($match[1],$URI); - } - - // have we hit our frame depth and is there frame src to fetch? - if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) - { - $this->results[] = $results; - for($x=0; $x<count($match[1]); $x++) - $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); - } - // have we already fetched framed content? - elseif(is_array($this->results)) - $this->results[] = $results; - // no framed content - else - $this->results = $results; - - return true; - } - -/*======================================================================*\ - Function: _httpsrequest - Purpose: go get the https data from the server using curl - Input: $url the url to fetch - $URI the full URI - $body body contents to send if any (POST) - Output: -\*======================================================================*/ - - function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") - { - if($this->passcookies && $this->_redirectaddr) - $this->setcookies(); - - $headers = array(); - - $URI_PARTS = parse_url($URI); - if(empty($url)) - $url = "/"; - // GET ... header not needed for curl - //$headers[] = $http_method." ".$url." ".$this->_httpversion; - if(!empty($this->agent)) - $headers[] = "User-Agent: ".$this->agent; - if(!empty($this->host)) - $headers[] = "Host: ".$this->host; - if(!empty($this->accept)) - $headers[] = "Accept: ".$this->accept; - if(!empty($this->referer)) - $headers[] = "Referer: ".$this->referer; - if(!empty($this->cookies)) - { - if(!is_array($this->cookies)) - $this->cookies = (array)$this->cookies; - - reset($this->cookies); - if ( count($this->cookies) > 0 ) { - $cookie_str = 'Cookie: '; - foreach ( $this->cookies as $cookieKey => $cookieVal ) { - $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; - } - $headers[] = substr($cookie_str,0,-2); - } - } - if(!empty($this->rawheaders)) - { - if(!is_array($this->rawheaders)) - $this->rawheaders = (array)$this->rawheaders; - while(list($headerKey,$headerVal) = each($this->rawheaders)) - $headers[] = $headerKey.": ".$headerVal; - } - if(!empty($content_type)) { - if ($content_type == "multipart/form-data") - $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; - else - $headers[] = "Content-type: $content_type"; - } - if(!empty($body)) - $headers[] = "Content-length: ".strlen($body); - if(!empty($this->user) || !empty($this->pass)) - $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); - - for($curr_header = 0; $curr_header < count($headers); $curr_header++) - $cmdline_params .= " -H \"".$headers[$curr_header]."\""; - - if(!empty($body)) - $cmdline_params .= " -d \"$body\""; - - if($this->read_timeout > 0) - $cmdline_params .= " -m ".$this->read_timeout; - - $headerfile = uniqid(time()); - - # accept self-signed certs - $cmdline_params .= " -k"; - exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return); - - if($return) - { - $this->error = "Error: cURL could not retrieve the document, error $return."; - return false; - } - - - $results = implode("\r\n",$results); - - $result_headers = file("/tmp/$headerfile"); - - $this->_redirectaddr = false; - unset($this->headers); - - for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) - { - - // if a header begins with Location: or URI:, set the redirect - if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) - { - // get URL portion of the redirect - preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); - // look for :// in the Location header to see if hostname is included - if(!preg_match("|\:\/\/|",$matches[2])) - { - // no host in the path, so prepend - $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; - // eliminate double slash - if(!preg_match("|^/|",$matches[2])) - $this->_redirectaddr .= "/".$matches[2]; - else - $this->_redirectaddr .= $matches[2]; - } - else - $this->_redirectaddr = $matches[2]; - } - - if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) - { - $this->response_code = $result_headers[$currentHeader]; - if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) - { - $this->status= $match[1]; - } - } - $this->headers[] = $result_headers[$currentHeader]; - } - - // check if there is a a redirect meta tag - - if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) - { - $this->_redirectaddr = $this->_expandlinks($match[1],$URI); - } - - // have we hit our frame depth and is there frame src to fetch? - if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) - { - $this->results[] = $results; - for($x=0; $x<count($match[1]); $x++) - $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); - } - // have we already fetched framed content? - elseif(is_array($this->results)) - $this->results[] = $results; - // no framed content - else - $this->results = $results; - - unlink("/tmp/$headerfile"); - - return true; - } - -/*======================================================================*\ - Function: setcookies() - Purpose: set cookies for a redirection -\*======================================================================*/ - - function setcookies() - { - for($x=0; $x<count($this->headers); $x++) - { - if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) - $this->cookies[$match[1]] = $match[2]; - } - } - - -/*======================================================================*\ - Function: _check_timeout - Purpose: checks whether timeout has occurred - Input: $fp file pointer -\*======================================================================*/ - - function _check_timeout($fp) - { - if ($this->read_timeout > 0) { - $fp_status = socket_get_status($fp); - if ($fp_status["timed_out"]) { - $this->timed_out = true; - return true; - } - } - return false; - } - -/*======================================================================*\ - Function: _connect - Purpose: make a socket connection - Input: $fp file pointer -\*======================================================================*/ - - function _connect(&$fp) - { - if(!empty($this->proxy_host) && !empty($this->proxy_port)) - { - $this->_isproxy = true; - $host = $this->proxy_host; - $port = $this->proxy_port; - } - else - { - $host = $this->host; - $port = $this->port; - } - - $this->status = 0; - - if($fp = fsockopen( - $host, - $port, - $errno, - $errstr, - $this->_fp_timeout - )) - { - // socket connection succeeded - - return true; - } - else - { - // socket connection failed - $this->status = $errno; - switch($errno) - { - case -3: - $this->error="socket creation failed (-3)"; - case -4: - $this->error="dns lookup failure (-4)"; - case -5: - $this->error="connection refused or timed out (-5)"; - default: - $this->error="connection failed (".$errno.")"; - } - return false; - } - } -/*======================================================================*\ - Function: _disconnect - Purpose: disconnect a socket connection - Input: $fp file pointer -\*======================================================================*/ - - function _disconnect($fp) - { - return(fclose($fp)); - } - - -/*======================================================================*\ - Function: _prepare_post_body - Purpose: Prepare post body according to encoding type - Input: $formvars - form variables - $formfiles - form upload files - Output: post body -\*======================================================================*/ - - function _prepare_post_body($formvars, $formfiles) - { - settype($formvars, "array"); - settype($formfiles, "array"); - - if (count($formvars) == 0 && count($formfiles) == 0) - return; - - switch ($this->_submit_type) { - case "application/x-www-form-urlencoded": - reset($formvars); - while(list($key,$val) = each($formvars)) { - if (is_array($val) || is_object($val)) { - while (list($cur_key, $cur_val) = each($val)) { - $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; - } - } else - $postdata .= urlencode($key)."=".urlencode($val)."&"; - } - break; - - case "multipart/form-data": - $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); - - reset($formvars); - while(list($key,$val) = each($formvars)) { - if (is_array($val) || is_object($val)) { - while (list($cur_key, $cur_val) = each($val)) { - $postdata .= "--".$this->_mime_boundary."\r\n"; - $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; - $postdata .= "$cur_val\r\n"; - } - } else { - $postdata .= "--".$this->_mime_boundary."\r\n"; - $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; - $postdata .= "$val\r\n"; - } - } - - reset($formfiles); - while (list($field_name, $file_names) = each($formfiles)) { - settype($file_names, "array"); - while (list(, $file_name) = each($file_names)) { - if (!is_readable($file_name)) continue; - - $fp = fopen($file_name, "r"); - $file_content = fread($fp, filesize($file_name)); - fclose($fp); - $base_name = basename($file_name); - - $postdata .= "--".$this->_mime_boundary."\r\n"; - $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; - $postdata .= "$file_content\r\n"; - } - } - $postdata .= "--".$this->_mime_boundary."--\r\n"; - break; - } - - return $postdata; - } -} - -?> diff --git a/inc/magpie/rss_fetch.inc b/inc/magpie/rss_fetch.inc index c1098d681..a9ed2cc13 100644 --- a/inc/magpie/rss_fetch.inc +++ b/inc/magpie/rss_fetch.inc @@ -14,6 +14,10 @@ * Magpie mailing list: * magpierss-general@lists.sourceforge.net * + * This file was modified for DokuWiki to use it's own HTTPClient lib + * instead of the Snoopy lib delivered with Magpie. Search for DokuWiki + * to find the modifications. Original code was commented out. + * */ // Setup MAGPIE_DIR for use on hosts that don't include @@ -30,9 +34,13 @@ if (!defined('MAGPIE_DIR')) { require_once( MAGPIE_DIR . 'rss_parse.inc' ); require_once( MAGPIE_DIR . 'rss_cache.inc' ); + +//DokuWiki changes // for including 3rd party libraries -define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); -require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); +//define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); +// require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); +require_once(DOKU_INC.'inc/HTTPClient.php'); /* @@ -262,9 +270,20 @@ function magpie_error ($errormsg="") { Purpose: retrieve an arbitrary remote file Input: url of the remote file headers to send along with the request (optional) - Output: an HTTP response object (see Snoopy.class.inc) + Output: an HTTPClient response object \*=======================================================================*/ function _fetch_remote_file ($url, $headers = "" ) { + // We use DokuWiki's HTTPClient here + + $client = new DokuHTTPClient(); + $client->timeout = MAGPIE_FETCH_TIME_OUT; + if (is_array($headers) ) { + $client->headers = $headers; + } + $client->get($url); + return $client; + +/* // Snoopy is an HTTP client in PHP $client = new Snoopy(); $client->agent = MAGPIE_USER_AGENT; @@ -276,22 +295,31 @@ function _fetch_remote_file ($url, $headers = "" ) { @$client->fetch($url); return $client; - +*/ } /*=======================================================================*\ Function: _response_to_rss Purpose: parse an HTTP response object into an RSS object - Input: an HTTP response object (see Snoopy) + Input: an HTTP response object, DokuWiki HTTPClient Output: parsed RSS object (see rss_parse) \*=======================================================================*/ function _response_to_rss ($resp) { - $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); +//DokuWiki: +# $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); + $rss = new MagpieRSS( $resp->resp_body, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); // if RSS parsed successfully if ( $rss and !$rss->ERROR) { - // find Etag, and Last-Modified + if($resp->resp_headers['etag']){ + $rss->etag = $val; + } + if($resp->resp_headers['last-modified']){ + $rss->last_modified = $val; + } + +/* foreach($resp->headers as $h) { // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" if (strpos($h, ": ")) { @@ -310,7 +338,7 @@ function _response_to_rss ($resp) { $rss->last_modified = $val; } } - +*/ return $rss; } // else construct error message else { |