From 894a80cc56d188b10cc78bb4c088bea731f991a2 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Tue, 17 Oct 2006 19:53:29 +0200 Subject: restrict fetch.php's download abilities This patch changes fetch.php ability to download external files. It now checks for the returned MIME type and will only download images. For all other MIME types a redirect is sent back to the browser. This reduces the risc of being misused as open proxy. Additionally the download facility is disabled completly by default by setting the fetchsize option to 0. Users who want the feature need to overwrite the option in their local.php. Background: The ability to download external files is needed to resize external images on the server side. When disabled, a redirect is sent to the browser which will download the fullsize image and rescale it on the client side which is more bandwidth and CPU intensive. darcs-hash:20061017175329-7ad00-cd1b1bfa043a04540c51ca8380d28deaa14147d1.gz --- conf/dokuwiki.php | 2 +- inc/HTTPClient.php | 90 +++++++++++++++++++++++++++++------------------------- install.php | 2 +- lib/exe/fetch.php | 26 +++++++++++++++- 4 files changed, 76 insertions(+), 44 deletions(-) diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php index 9403de176..f3fd8286a 100644 --- a/conf/dokuwiki.php +++ b/conf/dokuwiki.php @@ -80,7 +80,7 @@ $conf['compression'] = 'gz'; //compress old revisions: (0: off) ('gz // bz2 generates smaller files, but needs more cpu-power $conf['cachetime'] = 60*60*24; //maximum age for cachefile in seconds (defaults to a day) $conf['locktime'] = 15*60; //maximum age for lockfiles (defaults to 15 minutes) -$conf['fetchsize'] = 2*1024*1024; //maximum size (bytes) fetch.php may download from extern +$conf['fetchsize'] = 0; //maximum size (bytes) fetch.php may download from extern, disabled by default $conf['notify'] = ''; //send change info to this email (leave blank for nobody) $conf['registernotify'] = ''; //send info about newly registered users to this email (leave blank for nobody) $conf['mailfrom'] = ''; //use this email when sending mails diff --git a/inc/HTTPClient.php b/inc/HTTPClient.php index 1612f1211..e42db26c4 100644 --- a/inc/HTTPClient.php +++ b/inc/HTTPClient.php @@ -59,6 +59,7 @@ class HTTPClient { var $referer; var $max_redirect; var $max_bodysize; // abort if the response body is bigger than this + var $header_regexp; // if set this RE must match against the headers, else abort var $headers; var $debug; @@ -99,6 +100,7 @@ class HTTPClient { $this->http = '1.0'; $this->debug = false; $this->max_bodysize = 0; + $this->header_regexp= ''; if(extension_loaded('zlib')) $this->headers['Accept-encoding'] = 'gzip'; $this->headers['Accept'] = 'text/xml,application/xml,application/xhtml+xml,'. 'text/html,text/plain,image/png,image/jpeg,image/gif,*/*'; @@ -227,14 +229,60 @@ class HTTPClient { $r_headers .= fread($socket,1); #FIXME read full lines here? }while(!preg_match('/\r\n\r\n$/',$r_headers)); + $this->_debug('response headers',$r_headers); + // check if expected body size exceeds allowance - if($this->max_bodysize && preg_match('/\r\nContent-Length:\s*(\d+)\r\n/i',$r_header,$match)){ + if($this->max_bodysize && preg_match('/\r\nContent-Length:\s*(\d+)\r\n/i',$r_headers,$match)){ if($match[1] > $this->max_bodysize){ $this->error = 'Reported content length exceeds allowed response size'; return false; } } + // get Status + if (!preg_match('/^HTTP\/(\d\.\d)\s*(\d+).*?\n/', $r_headers, $m)) { + $this->error = 'Server returned bad answer'; + return false; + } + $this->status = $m[2]; + + // handle headers and cookies + $this->resp_headers = $this->_parseHeaders($r_headers); + if(isset($this->resp_headers['set-cookie'])){ + foreach ($this->resp_headers['set-cookie'] as $c){ + list($key, $value, $foo) = split('=', $cookie); + $this->cookies[$key] = $value; + } + } + + $this->_debug('Object headers',$this->resp_headers); + + // check server status code to follow redirect + if($this->status == 301 || $this->status == 302 ){ + if (empty($this->resp_headers['location'])){ + $this->error = 'Redirect but no Location Header found'; + return false; + }elseif($this->redirect_count == $this->max_redirect){ + $this->error = 'Maximum number of redirects exceeded'; + return false; + }else{ + $this->redirect_count++; + $this->referer = $url; + if (!preg_match('/^http/i', $this->resp_headers['location'])){ + $this->resp_headers['location'] = $uri['scheme'].'://'.$uri['host']. + $this->resp_headers['location']; + } + // perform redirected request, always via GET (required by RFC) + return $this->sendRequest($this->resp_headers['location'],array(),'GET'); + } + } + + // check if headers are as expected + if($this->header_regexp && !preg_match($this->header_regexp,$r_headers)){ + $this->error = 'The received headers did not match the given regexp'; + return false; + } + //read body (with chunked encoding if needed) $r_body = ''; if(preg_match('/transfer\-(en)?coding:\s*chunked\r\n/i',$r_header)){ @@ -285,46 +333,6 @@ class HTTPClient { $status = socket_get_status($socket); fclose($socket); - $this->_debug('response headers',$r_headers); - - // get Status - if (!preg_match('/^HTTP\/(\d\.\d)\s*(\d+).*?\n/', $r_headers, $m)) { - $this->error = 'Server returned bad answer'; - return false; - } - $this->status = $m[2]; - - // handle headers and cookies - $this->resp_headers = $this->_parseHeaders($r_headers); - if(isset($this->resp_headers['set-cookie'])){ - foreach ($this->resp_headers['set-cookie'] as $c){ - list($key, $value, $foo) = split('=', $cookie); - $this->cookies[$key] = $value; - } - } - - $this->_debug('Object headers',$this->resp_headers); - - // check server status code to follow redirect - if($this->status == 301 || $this->status == 302 ){ - if (empty($this->resp_headers['location'])){ - $this->error = 'Redirect but no Location Header found'; - return false; - }elseif($this->redirect_count == $this->max_redirect){ - $this->error = 'Maximum number of redirects exceeded'; - return false; - }else{ - $this->redirect_count++; - $this->referer = $url; - if (!preg_match('/^http/i', $this->resp_headers['location'])){ - $this->resp_headers['location'] = $uri['scheme'].'://'.$uri['host']. - $this->resp_headers['location']; - } - // perform redirected request, always via GET (required by RFC) - return $this->sendRequest($this->resp_headers['location'],array(),'GET'); - } - } - // decode gzip if needed if($this->resp_headers['content-encoding'] == 'gzip'){ $this->resp_body = gzinflate(substr($r_body, 10)); diff --git a/install.php b/install.php index b0d2bc7fd..e7d979d2e 100644 --- a/install.php +++ b/install.php @@ -40,7 +40,7 @@ $dokuwiki_hash = array( '2005-09-22' => 'e33223e957b0b0a130d0520db08f8fb7', '2006-03-05' => '51295727f79ab9af309a2fd9e0b61acc', '2006-03-09' => '51295727f79ab9af309a2fd9e0b61acc', - 'rc2006-10-08' => 'c795599093bcde8765ed5df6c8dd7c4b', + 'rc2006-10-08' => 'b3a8af76845977c2000d85d6990dd72b', ); diff --git a/lib/exe/fetch.php b/lib/exe/fetch.php index f33f7b0cc..1f854b338 100644 --- a/lib/exe/fetch.php +++ b/lib/exe/fetch.php @@ -237,7 +237,7 @@ function get_from_URL($url,$ext,$cache){ if( ($mtime == 0) || // cache does not exist ($cache != -1 && $mtime < time()-$cache) // 'recache' and cache has expired ){ - if(io_download($url,$local,false,'',$conf['fetchsize'])){ + if(image_download($url,$local)){ return $local; }else{ return false; @@ -251,6 +251,30 @@ function get_from_URL($url,$ext,$cache){ return false; } +/** + * Download image files + * + * @author Andreas Gohr + */ +function image_download($url,$file){ + global $conf; + $http = new DokuHTTPClient(); + $http->max_bodysize = $conf['fetchsize']; + $http->timeout = 25; //max. 25 sec + $http->header_regexp = '!\r\nContent-Type: image/(jpe?g|gif|png)!i'; + + $data = $http->get($url); + if(!$data) return false; + + $fileexists = @file_exists($file); + $fp = @fopen($file,"w"); + if(!$fp) return false; + fwrite($fp,$data); + fclose($fp); + if(!$fileexists and $conf['fperm']) chmod($file, $conf['fperm']); + return true; +} + /** * resize images using external ImageMagick convert program * -- cgit v1.2.3