From c4f79b71351dd0d96f19f7c5629888d85a814c72 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Wed, 7 Apr 2010 11:31:50 +0200 Subject: Sitemap rewrite --- inc/actions.php | 57 +++++++++++++++++++++++++++-- inc/common.php | 15 ++++++++ inc/sitemap.php | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/exe/indexer.php | 98 ++------------------------------------------------ 4 files changed, 174 insertions(+), 97 deletions(-) create mode 100644 inc/sitemap.php diff --git a/inc/actions.php b/inc/actions.php index 3e0cb1207..2d70ac8ed 100644 --- a/inc/actions.php +++ b/inc/actions.php @@ -56,6 +56,10 @@ function act_dispatch(){ //check permissions $ACT = act_permcheck($ACT); + //sitemap + if ($ACT == 'sitemap') + $ACT = act_sitemap($ACT); + //register $nil = array(); if($ACT == 'register' && $_POST['save'] && register()){ @@ -205,7 +209,7 @@ function act_clean($act){ 'preview','search','show','check','index','revisions', 'diff','recent','backlink','admin','subscribe','revert', 'unsubscribe','profile','resendpwd','recover', - 'draftdel','subscribens','unsubscribens',)) && substr($act,0,7) != 'export_' ) { + 'draftdel','subscribens','unsubscribens','sitemap')) && substr($act,0,7) != 'export_' ) { msg('Command unknown: '.htmlspecialchars($act),-1); return 'show'; } @@ -233,7 +237,8 @@ function act_permcheck($act){ }else{ $permneed = AUTH_CREATE; } - }elseif(in_array($act,array('login','search','recent','profile','index'))){ + }elseif(in_array($act,array('login','search','recent','profile','index', 'sitemap'))){ + }elseif(in_array($act,array('login','search','recent','profile','sitemap'))){ $permneed = AUTH_NONE; }elseif($act == 'revert'){ $permneed = AUTH_ADMIN; @@ -586,6 +591,54 @@ function act_export($act){ return 'show'; } +/** + * Handle sitemap delivery + * + * @author Michael Hamann + */ +function act_sitemap($act) { + global $conf; + + if (!$conf['sitemap']) { + header("HTTP/1.0 404 Not Found"); + print "Sitemap generation is disabled."; + exit; + } + + $sitemap = $conf['cachedir'].'/sitemap.xml'; + if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + $mime = 'application/x-gzip'; + $sitemap .= '.gz'; + } else { + $mime = 'application/xml; charset=utf-8'; + } + + // Check if sitemap file exists, otherwise create it + if (!is_readable($sitemap)) { + require_once DOKU_INC.'inc/sitemap.php'; + sitemapGenerate(); + } + + if (is_readable($sitemap)) { + // Send headers + header('Content-Type: '.$mime); + + // Send file + //use x-sendfile header to pass the delivery to compatible webservers + if (http_sendfile($sitemap)) exit; + + $fp = @fopen($sitemap,"rb"); + if($fp){ + http_rangeRequest($fp,filesize($sitemap),$mime); + exit; + } + } + + header("HTTP/1.0 500 Internal Server Error"); + print "Could not read $sitemap - bad permissions?"; + exit; +} + /** * Handle page 'subscribe' * diff --git a/inc/common.php b/inc/common.php index bf5987c28..0816d9fbb 100644 --- a/inc/common.php +++ b/inc/common.php @@ -1266,6 +1266,21 @@ function dformat($dt=null,$format=''){ return strftime($format,$dt); } +/** + * Formats a timestamp as ISO 8601 date + * + * @author + * @link http://www.php.net/manual/en/function.date.php#54072 + */ +function date_iso8601($int_date) { + //$int_date: current date in UNIX timestamp + $date_mod = date('Y-m-d\TH:i:s', $int_date); + $pre_timezone = date('O', $int_date); + $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); + $date_mod .= $time_zone; + return $date_mod; +} + /** * return an obfuscated email address in line with $conf['mailguard'] setting * diff --git a/inc/sitemap.php b/inc/sitemap.php new file mode 100644 index 000000000..bbed7d269 --- /dev/null +++ b/inc/sitemap.php @@ -0,0 +1,101 @@ + + */ + +if(!defined('DOKU_INC')) die('meh.'); + +/** + * Builds a Google Sitemap of all public pages known to the indexer + * + * The map is placed in the cache directory named sitemap.xml.gz - This + * file needs to be writable! + * + * @author Andreas Gohr + * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html + */ +function sitemapGenerate(){ + global $conf; + dbglog('sitemapGenerate(): started'); + if(!$conf['sitemap']) return false; + + $sitemap = sitemapGetFilePath(); + dbglog("runSitemapper(): using $sitemap"); + + if(@file_exists($sitemap)){ + if(!is_writable($sitemap)) return false; + }else{ + if(!is_writable(dirname($sitemap))) return false; + } + + if(@filesize($sitemap) && + @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ + dbglog('runSitemapper(): Sitemap up to date'); + return false; + } + + $pages = idx_getIndex('page', ''); + dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages'); + + // build the sitemap + ob_start(); + print ''.NL; + print ''.NL; + foreach($pages as $id){ + $id = trim($id); + $file = wikiFN($id); + + //skip hidden, non existing and restricted files + if(isHiddenPage($id)) continue; + $date = @filemtime($file); + if(!$date) continue; + if(auth_aclcheck($id,'','') < AUTH_READ) continue; + + print ' '.NL; + print ' '.wl($id,'',true).''.NL; + print ' '.date_iso8601($date).''.NL; + print ' '.NL; + } + print ''.NL; + $data = ob_get_contents(); + ob_end_clean(); + + //save the new sitemap + return io_saveFile($sitemap,$data); +} + +function sitemapGetFilePath() { + global $conf; + + $sitemap = $conf['cachedir'].'/sitemap.xml'; + if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + $sitemap .= '.gz'; + } + + return $sitemap; +} + +function sitemapPingSearchEngines() { + //ping search engines... + $http = new DokuHTTPClient(); + $http->timeout = 8; + + $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&')); + $ping_urls = array( + 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url, + 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='.$encoded_sitemap_url, + 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, + ); + + foreach ($ping_urls as $name => $url) { + dbglog("sitemapPingSearchEngines(): pinging $name"); + $resp = $http->get($url); + if($http->error) dbglog("runSitemapper(): $http->error"); + dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); + } + + return true; +} diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index f8e2f7981..63ad5931f 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -232,88 +232,11 @@ function metaUpdate(){ * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html */ function runSitemapper(){ - global $conf; print "runSitemapper(): started".NL; - if(!$conf['sitemap']) return false; - - if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ - $sitemap = 'sitemap.xml.gz'; - }else{ - $sitemap = 'sitemap.xml'; - } - print "runSitemapper(): using $sitemap".NL; - - if(@file_exists(DOKU_INC.$sitemap)){ - if(!is_writable(DOKU_INC.$sitemap)) return false; - }else{ - if(!is_writable(DOKU_INC)) return false; - } - - if(@filesize(DOKU_INC.$sitemap) && - @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ - print 'runSitemapper(): Sitemap up to date'.NL; - return false; - } - - $pages = idx_getIndex('page', ''); - print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; - - // build the sitemap - ob_start(); - print ''.NL; - print ''.NL; - foreach($pages as $id){ - $id = trim($id); - $file = wikiFN($id); - - //skip hidden, non existing and restricted files - if(isHiddenPage($id)) continue; - $date = @filemtime($file); - if(!$date) continue; - if(auth_aclcheck($id,'','') < AUTH_READ) continue; - - print ' '.NL; - print ' '.wl($id,'',true).''.NL; - print ' '.date_iso8601($date).''.NL; - print ' '.NL; - } - print ''.NL; - $data = ob_get_contents(); - ob_end_clean(); - - //save the new sitemap - io_saveFile(DOKU_INC.$sitemap,$data); - - //ping search engines... - $http = new DokuHTTPClient(); - $http->timeout = 8; - - //ping google - print 'runSitemapper(): pinging google'.NL; - $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; - $url .= urlencode(DOKU_URL.$sitemap); - $resp = $http->get($url); - if($http->error) print 'runSitemapper(): '.$http->error.NL; - print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; - - //ping yahoo - print 'runSitemapper(): pinging yahoo'.NL; - $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; - $url .= urlencode(DOKU_URL.$sitemap); - $resp = $http->get($url); - if($http->error) print 'runSitemapper(): '.$http->error.NL; - print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; - - //ping microsoft - print 'runSitemapper(): pinging microsoft'.NL; - $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; - $url .= urlencode(DOKU_URL.$sitemap); - $resp = $http->get($url); - if($http->error) print 'runSitemapper(): '.$http->error.NL; - print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL; - + require_once DOKU_INC.'inc/sitemap.php'; + $result = sitemapGenerate() && sitemapPingSearchEngines(); print 'runSitemapper(): finished'.NL; - return true; + return $result; } /** @@ -408,21 +331,6 @@ function sendDigest() { $_SERVER['REMOTE_USER'] = $olduser; } -/** - * Formats a timestamp as ISO 8601 date - * - * @author - * @link http://www.php.net/manual/en/function.date.php#54072 - */ -function date_iso8601($int_date) { - //$int_date: current date in UNIX timestamp - $date_mod = date('Y-m-d\TH:i:s', $int_date); - $pre_timezone = date('O', $int_date); - $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); - $date_mod .= $time_zone; - return $date_mod; -} - /** * Just send a 1x1 pixel blank gif to the browser * -- cgit v1.2.3 From 2897eb23759202676f5447a72d7fe5eb68321ce3 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 26 Jun 2010 13:33:46 +0200 Subject: Transformed the sitemapper into a class This makes it possible to autoload the sitemapper when needed. --- inc/Sitemapper.php | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++ inc/actions.php | 3 +- inc/load.php | 1 + inc/sitemap.php | 101 --------------------------------------------------- lib/exe/indexer.php | 3 +- 5 files changed, 106 insertions(+), 105 deletions(-) create mode 100644 inc/Sitemapper.php delete mode 100644 inc/sitemap.php diff --git a/inc/Sitemapper.php b/inc/Sitemapper.php new file mode 100644 index 000000000..68f4beddb --- /dev/null +++ b/inc/Sitemapper.php @@ -0,0 +1,103 @@ + + */ + +if(!defined('DOKU_INC')) die('meh.'); + +class Sitemapper { + /** + * Builds a Google Sitemap of all public pages known to the indexer + * + * The map is placed in the cache directory named sitemap.xml.gz - This + * file needs to be writable! + * + * @author Andreas Gohr + * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html + */ + public function generate(){ + global $conf; + dbglog('sitemapGenerate(): started'); + if(!$conf['sitemap']) return false; + + $sitemap = Sitemapper::getFilePath(); + dbglog("runSitemapper(): using $sitemap"); + + if(@file_exists($sitemap)){ + if(!is_writable($sitemap)) return false; + }else{ + if(!is_writable(dirname($sitemap))) return false; + } + + if(@filesize($sitemap) && + @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ + dbglog('runSitemapper(): Sitemap up to date'); + return false; + } + + $pages = idx_getIndex('page', ''); + dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages'); + + // build the sitemap + ob_start(); + print ''.NL; + print ''.NL; + foreach($pages as $id){ + $id = trim($id); + $file = wikiFN($id); + + //skip hidden, non existing and restricted files + if(isHiddenPage($id)) continue; + $date = @filemtime($file); + if(!$date) continue; + if(auth_aclcheck($id,'','') < AUTH_READ) continue; + + print ' '.NL; + print ' '.wl($id,'',true).''.NL; + print ' '.date_iso8601($date).''.NL; + print ' '.NL; + } + print ''.NL; + $data = ob_get_contents(); + ob_end_clean(); + + //save the new sitemap + return io_saveFile($sitemap,$data); + } + + public function getFilePath() { + global $conf; + + $sitemap = $conf['cachedir'].'/sitemap.xml'; + if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + $sitemap .= '.gz'; + } + + return $sitemap; + } + + public function pingSearchEngines() { + //ping search engines... + $http = new DokuHTTPClient(); + $http->timeout = 8; + + $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&')); + $ping_urls = array( + 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url, + 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='.$encoded_sitemap_url, + 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, + ); + + foreach ($ping_urls as $name => $url) { + dbglog("sitemapPingSearchEngines(): pinging $name"); + $resp = $http->get($url); + if($http->error) dbglog("runSitemapper(): $http->error"); + dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); + } + + return true; + } +} diff --git a/inc/actions.php b/inc/actions.php index 2d70ac8ed..12c4c595f 100644 --- a/inc/actions.php +++ b/inc/actions.php @@ -615,8 +615,7 @@ function act_sitemap($act) { // Check if sitemap file exists, otherwise create it if (!is_readable($sitemap)) { - require_once DOKU_INC.'inc/sitemap.php'; - sitemapGenerate(); + Sitemapper::generate(); } if (is_readable($sitemap)) { diff --git a/inc/load.php b/inc/load.php index 2f5be6d63..478ee7c76 100644 --- a/inc/load.php +++ b/inc/load.php @@ -74,6 +74,7 @@ function load_autoload($name){ 'DokuWikiFeedCreator' => DOKU_INC.'inc/feedcreator.class.php', 'Doku_Parser_Mode' => DOKU_INC.'inc/parser/parser.php', 'SafeFN' => DOKU_INC.'inc/SafeFN.class.php', + 'Sitemapper' => DOKU_INC.'inc/Sitemapper.php', 'DokuWiki_Action_Plugin' => DOKU_PLUGIN.'action.php', 'DokuWiki_Admin_Plugin' => DOKU_PLUGIN.'admin.php', diff --git a/inc/sitemap.php b/inc/sitemap.php deleted file mode 100644 index bbed7d269..000000000 --- a/inc/sitemap.php +++ /dev/null @@ -1,101 +0,0 @@ - - */ - -if(!defined('DOKU_INC')) die('meh.'); - -/** - * Builds a Google Sitemap of all public pages known to the indexer - * - * The map is placed in the cache directory named sitemap.xml.gz - This - * file needs to be writable! - * - * @author Andreas Gohr - * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html - */ -function sitemapGenerate(){ - global $conf; - dbglog('sitemapGenerate(): started'); - if(!$conf['sitemap']) return false; - - $sitemap = sitemapGetFilePath(); - dbglog("runSitemapper(): using $sitemap"); - - if(@file_exists($sitemap)){ - if(!is_writable($sitemap)) return false; - }else{ - if(!is_writable(dirname($sitemap))) return false; - } - - if(@filesize($sitemap) && - @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ - dbglog('runSitemapper(): Sitemap up to date'); - return false; - } - - $pages = idx_getIndex('page', ''); - dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages'); - - // build the sitemap - ob_start(); - print ''.NL; - print ''.NL; - foreach($pages as $id){ - $id = trim($id); - $file = wikiFN($id); - - //skip hidden, non existing and restricted files - if(isHiddenPage($id)) continue; - $date = @filemtime($file); - if(!$date) continue; - if(auth_aclcheck($id,'','') < AUTH_READ) continue; - - print ' '.NL; - print ' '.wl($id,'',true).''.NL; - print ' '.date_iso8601($date).''.NL; - print ' '.NL; - } - print ''.NL; - $data = ob_get_contents(); - ob_end_clean(); - - //save the new sitemap - return io_saveFile($sitemap,$data); -} - -function sitemapGetFilePath() { - global $conf; - - $sitemap = $conf['cachedir'].'/sitemap.xml'; - if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ - $sitemap .= '.gz'; - } - - return $sitemap; -} - -function sitemapPingSearchEngines() { - //ping search engines... - $http = new DokuHTTPClient(); - $http->timeout = 8; - - $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&')); - $ping_urls = array( - 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url, - 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='.$encoded_sitemap_url, - 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, - ); - - foreach ($ping_urls as $name => $url) { - dbglog("sitemapPingSearchEngines(): pinging $name"); - $resp = $http->get($url); - if($http->error) dbglog("runSitemapper(): $http->error"); - dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); - } - - return true; -} diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index 63ad5931f..61cf83acc 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -233,8 +233,7 @@ function metaUpdate(){ */ function runSitemapper(){ print "runSitemapper(): started".NL; - require_once DOKU_INC.'inc/sitemap.php'; - $result = sitemapGenerate() && sitemapPingSearchEngines(); + $result = Sitemapper::generate() && Sitemapper::pingSearchEngines(); print 'runSitemapper(): finished'.NL; return $result; } -- cgit v1.2.3 From 2b54e1e1cc3c24ef164b726b19467ec5536249f5 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sun, 27 Jun 2010 15:09:41 +0200 Subject: Restructured the sitemapper --- inc/Sitemapper.php | 96 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 26 deletions(-) diff --git a/inc/Sitemapper.php b/inc/Sitemapper.php index 68f4beddb..03f4d7bc4 100644 --- a/inc/Sitemapper.php +++ b/inc/Sitemapper.php @@ -33,41 +33,48 @@ class Sitemapper { } if(@filesize($sitemap) && - @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ - dbglog('runSitemapper(): Sitemap up to date'); - return false; - } + @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ + dbglog('runSitemapper(): Sitemap up to date'); + return false; + } $pages = idx_getIndex('page', ''); dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages'); + $items = array(); - // build the sitemap - ob_start(); - print ''.NL; - print ''.NL; + // build the sitemap items foreach($pages as $id){ - $id = trim($id); - $file = wikiFN($id); - //skip hidden, non existing and restricted files if(isHiddenPage($id)) continue; - $date = @filemtime($file); - if(!$date) continue; if(auth_aclcheck($id,'','') < AUTH_READ) continue; + $items[] = SitemapItem::createFromID($id); + } + + $eventData = array('items' => &$items, 'sitemap' => &$sitemap); + $event = new Doku_Event('SITEMAP_GENERATE', $eventData); + if ($event->advise_before(true)) { + //save the new sitemap + $result = io_saveFile($sitemap, Sitemapper::getXML($items)); + } + $event->advise_after(); - print ' '.NL; - print ' '.wl($id,'',true).''.NL; - print ' '.date_iso8601($date).''.NL; - print ' '.NL; + return $result; + } + + private function getXML($items) { + ob_start(); + print ''.NL; + print ''.NL; + foreach ($items as $item) { + print $item->toXML(); } print ''.NL; - $data = ob_get_contents(); + $result = ob_get_contents(); ob_end_clean(); - - //save the new sitemap - return io_saveFile($sitemap,$data); + return $result; } + public function getFilePath() { global $conf; @@ -91,13 +98,50 @@ class Sitemapper { 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, ); - foreach ($ping_urls as $name => $url) { - dbglog("sitemapPingSearchEngines(): pinging $name"); - $resp = $http->get($url); - if($http->error) dbglog("runSitemapper(): $http->error"); - dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); + $event = new Doku_Event('SITEMAP_PING', $ping_urls); + if ($event->advise_before(true)) { + foreach ($ping_urls as $name => $url) { + dbglog("sitemapPingSearchEngines(): pinging $name"); + $resp = $http->get($url); + if($http->error) dbglog("runSitemapper(): $http->error"); + dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); + } } + $event->advise_after(); return true; } } + +class SitemapItem { + public $url; + public $lastmod; + public $changefreq; + public $priority; + + public function __construct($url, $lastmod, $changefreq = null, $priority = null) { + $this->url = $url; + $this->lastmod = $lastmod; + $this->changefreq = $changefreq; + $this->priority = $priority; + } + + public static function createFromID($id, $changefreq = null, $priority = null) { + $id = trim($id); + $date = @filemtime(wikiFN($id)); + if(!$date) return NULL; + return new SitemapItem(wl($id, '', true), $date, $changefreq, $priority); + } + + public function toXML() { + $result = ' '.NL; + $result .= ' '.hsc($this->url).''.NL; + $result .= ' '.date_iso8601($this->lastmod).''.NL; + if ($this->changefreq !== NULL) + $result .= ' '.hsc($this->changefreq).''.NL; + if ($this->priority !== NULL) + $result .= ' '.hsc($this->priority).''.NL; + $result .= ' '.NL; + return $result; + } +} -- cgit v1.2.3 From 6c062f5e5826443084fc996d18a7001b28624f78 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Sat, 18 Sep 2010 17:23:50 +0200 Subject: Sitemapper code improved and documentation added Removed some calls to dbglog, improved the code performance and added documentation for all functions and classes of the Sitemapper. --- inc/Sitemapper.php | 94 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 20 deletions(-) diff --git a/inc/Sitemapper.php b/inc/Sitemapper.php index 03f4d7bc4..52c71c545 100644 --- a/inc/Sitemapper.php +++ b/inc/Sitemapper.php @@ -8,6 +8,11 @@ if(!defined('DOKU_INC')) die('meh.'); +/** + * A class for building sitemaps and pinging search engines with the sitemap URL. + * + * @author Michael Hamann + */ class Sitemapper { /** * Builds a Google Sitemap of all public pages known to the indexer @@ -15,16 +20,16 @@ class Sitemapper { * The map is placed in the cache directory named sitemap.xml.gz - This * file needs to be writable! * + * @author Michael Hamann * @author Andreas Gohr * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html + * @link http://www.sitemaps.org/ */ public function generate(){ global $conf; - dbglog('sitemapGenerate(): started'); - if(!$conf['sitemap']) return false; + if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false; $sitemap = Sitemapper::getFilePath(); - dbglog("runSitemapper(): using $sitemap"); if(@file_exists($sitemap)){ if(!is_writable($sitemap)) return false; @@ -33,13 +38,15 @@ class Sitemapper { } if(@filesize($sitemap) && - @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ - dbglog('runSitemapper(): Sitemap up to date'); + @filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400 + dbglog('Sitemapper::generate(): Sitemap up to date'); // FIXME: only in debug mode return false; } + dbglog("Sitemapper::generate(): using $sitemap"); // FIXME: Only in debug mode + $pages = idx_getIndex('page', ''); - dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages'); + dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages'); $items = array(); // build the sitemap items @@ -61,31 +68,49 @@ class Sitemapper { return $result; } + /** + * Builds the sitemap XML string from the given array auf SitemapItems. + * + * @param $items array The SitemapItems that shall be included in the sitemap. + * @return string The sitemap XML. + * @author Michael Hamann + */ private function getXML($items) { ob_start(); - print ''.NL; - print ''.NL; + echo ''.NL; + echo ''.NL; foreach ($items as $item) { - print $item->toXML(); + echo $item->toXML(); } - print ''.NL; + echo ''.NL; $result = ob_get_contents(); ob_end_clean(); return $result; } - + /** + * Helper function for getting the path to the sitemap file. + * + * @return The path to the sitemap file. + * @author Michael Hamann + */ public function getFilePath() { global $conf; $sitemap = $conf['cachedir'].'/sitemap.xml'; - if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + if($conf['compression'] === 'bz2' || $conf['compression'] === 'gz'){ $sitemap .= '.gz'; } return $sitemap; } + /** + * Pings search engines with the sitemap url. Plugins can add or remove + * urls to ping using the SITEMAP_PING event. + * + * @author Michael Hamann + */ public function pingSearchEngines() { //ping search engines... $http = new DokuHTTPClient(); @@ -98,13 +123,16 @@ class Sitemapper { 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, ); - $event = new Doku_Event('SITEMAP_PING', $ping_urls); + $data = array('ping_urls' => $ping_urls, + 'encoded_sitemap_url' => $encoded_sitemap_url + ); + $event = new Doku_Event('SITEMAP_PING', $data); if ($event->advise_before(true)) { - foreach ($ping_urls as $name => $url) { - dbglog("sitemapPingSearchEngines(): pinging $name"); + foreach ($data['ping_urls'] as $name => $url) { + dbglog("Sitemapper::PingSearchEngines(): pinging $name"); $resp = $http->get($url); - if($http->error) dbglog("runSitemapper(): $http->error"); - dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); + if($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error"); + dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); } } $event->advise_after(); @@ -113,12 +141,25 @@ class Sitemapper { } } +/** + * An item of a sitemap. + * + * @author Michael Hamann + */ class SitemapItem { public $url; public $lastmod; public $changefreq; public $priority; + /** + * Create a new item. + * + * @param $url string The url of the item + * @param $lastmod int Timestamp of the last modification + * @param $changefreq string How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never. + * @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0. + */ public function __construct($url, $lastmod, $changefreq = null, $priority = null) { $this->url = $url; $this->lastmod = $lastmod; @@ -126,6 +167,14 @@ class SitemapItem { $this->priority = $priority; } + /** + * Helper function for creating an item for a wikipage id. + * + * @param $id string A wikipage id. + * @param $changefreq string How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never. + * @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0. + * @return The sitemap item. + */ public static function createFromID($id, $changefreq = null, $priority = null) { $id = trim($id); $date = @filemtime(wikiFN($id)); @@ -133,10 +182,15 @@ class SitemapItem { return new SitemapItem(wl($id, '', true), $date, $changefreq, $priority); } + /** + * Get the XML representation of the sitemap item. + * + * @return The XML representation. + */ public function toXML() { - $result = ' '.NL; - $result .= ' '.hsc($this->url).''.NL; - $result .= ' '.date_iso8601($this->lastmod).''.NL; + $result = ' '.NL + .' '.hsc($this->url).''.NL + .' '.date_iso8601($this->lastmod).''.NL; if ($this->changefreq !== NULL) $result .= ' '.hsc($this->changefreq).''.NL; if ($this->priority !== NULL) -- cgit v1.2.3 From eae17177de8f3f3580af5ea66d126aee0f23227f Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Wed, 22 Sep 2010 17:52:13 +0200 Subject: Action handler for sitemaps improved The action handler for the sitemap now makes use of the sitemapper methods for determining the filename and uses http conditional requests. --- inc/actions.php | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/inc/actions.php b/inc/actions.php index 12c4c595f..78666ec98 100644 --- a/inc/actions.php +++ b/inc/actions.php @@ -57,8 +57,9 @@ function act_dispatch(){ $ACT = act_permcheck($ACT); //sitemap - if ($ACT == 'sitemap') + if ($ACT == 'sitemap'){ $ACT = act_sitemap($ACT); + } //register $nil = array(); @@ -599,17 +600,16 @@ function act_export($act){ function act_sitemap($act) { global $conf; - if (!$conf['sitemap']) { + if ($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) { header("HTTP/1.0 404 Not Found"); print "Sitemap generation is disabled."; exit; } - $sitemap = $conf['cachedir'].'/sitemap.xml'; - if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ + $sitemap = Sitemapper::getFilePath(); + if(strrchr($sitemap, '.') === '.gz'){ $mime = 'application/x-gzip'; - $sitemap .= '.gz'; - } else { + }else{ $mime = 'application/xml; charset=utf-8'; } @@ -622,19 +622,18 @@ function act_sitemap($act) { // Send headers header('Content-Type: '.$mime); + http_conditionalRequest(filemtime($sitemap)); + // Send file //use x-sendfile header to pass the delivery to compatible webservers if (http_sendfile($sitemap)) exit; - $fp = @fopen($sitemap,"rb"); - if($fp){ - http_rangeRequest($fp,filesize($sitemap),$mime); - exit; - } + readfile($sitemap); + exit; } header("HTTP/1.0 500 Internal Server Error"); - print "Could not read $sitemap - bad permissions?"; + print "Could not read the sitemap file - bad permissions?"; exit; } -- cgit v1.2.3