From 8b9da5a7173ccde168f612b798f2af2a82d125fc Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 27 Nov 2005 00:47:09 +0100 Subject: Added Google sitemap support #371 This patch addes the automatic creation of Google sitemaps. The map is created in the DokuWiki root dir and named sitemap.xml.gz if gzip compression is available - if not the gz extion is skipped. How often the map is recreated is defined through the $conf['sitemap'] option. It accepts a day value. darcs-hash:20051126234709-7ad00-6ff4b0e79670cdfa39e615ec9dc40146ffcc9dd4.gz --- lib/exe/indexer.php | 168 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 117 insertions(+), 51 deletions(-) (limited to 'lib/exe/indexer.php') diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php index 80dbb77f5..2eea32f02 100644 --- a/lib/exe/indexer.php +++ b/lib/exe/indexer.php @@ -5,77 +5,143 @@ * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) * @author Andreas Gohr */ - -/** - * Just send a 1x1 pixel blank gif to the browser and exit - - */ -function sendGIF(){ - $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); - header('Content-Type: image/gif'); - header('Content-Length: '.strlen($img)); - header('Connection: Close'); - print $img; - // Browser should drop connection after this - // Thinks it's got the whole image -} - -// Make sure image is sent to the browser immediately -ob_implicit_flush(TRUE); +if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); +require_once(DOKU_INC.'inc/init.php'); +require_once(DOKU_INC.'inc/auth.php'); +session_write_close(); //close session // keep running after browser closes connection @ignore_user_abort(true); +// send gif sendGIF(); -// Switch off implicit flush again - we don't want to send any more output -ob_implicit_flush(FALSE); - // Catch any possible output (e.g. errors) // - probably not needed but better safe... ob_start(); -// Called to exit - we don't want any output going anywhere -function indexer_stop() { - ob_end_clean(); - exit(); -} - // Now start work -if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); -require_once(DOKU_INC.'inc/init.php'); -//close session -session_write_close(); +require_once(DOKU_INC.'inc/utf8.php'); +require_once(DOKU_INC.'inc/auth.php'); -require_once(DOKU_INC.'inc/indexer.php'); +// run one of the jobs +runIndexer() or runSitemapper(); -$ID = cleanID($_REQUEST['id']); -if(!$ID) indexer_stop(); +ob_end_clean(); +exit; -// check if indexing needed -$last = @filemtime(metaFN($ID,'.indexed')); -if($last > @filemtime(wikiFN($ID))) indexer_stop(); +// -------------------------------------------------------------------- -// try to aquire a lock -$lock = $conf['lockdir'].'/_indexer.lock'; -while(!@mkdir($lock,0777)){ - if(time()-@filemtime($lock) > 60*5){ - // looks like a stale lock - remove it - @rmdir($lock); - }else{ - indexer_stop(); +/** + * Runs the indexer for the current page + * + * @author Andreas Gohr + */ +function runIndexer(){ + global $conf; + + $ID = cleanID($_REQUEST['id']); + if(!$ID) return false; + + // check if indexing needed + $last = @filemtime(metaFN($ID,'.indexed')); + if($last > @filemtime(wikiFN($ID))) return false; + + // try to aquire a lock + $lock = $conf['lockdir'].'/_indexer.lock'; + while(!@mkdir($lock,0777)){ + if(time()-@filemtime($lock) > 60*5){ + // looks like a stale lock - remove it + @rmdir($lock); + }else{ + return false; + } } + + require_once(DOKU_INC.'inc/indexer.php'); + + // do the work + idx_addPage($ID); + + // we're finished - save and free lock + io_saveFile(metaFN($ID,'.indexed'),' '); + @rmdir($lock); + return true; } -// do the work -idx_addPage($ID); +/** + * Builds a Google Sitemap of all public pages known to the indexer + * + * The map is placed in the root directory named sitemap.xml.gz - This + * file needs to be writable! + * + * @author Andreas Gohr + * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html + */ +function runSitemapper(){ + global $conf; + if(!$conf['sitemap']) return false; + if(!defined('NL')) define('NL',"\n"); -// we're finished -io_saveFile(metaFN($ID,'.indexed'),' '); -@rmdir($lock); -indexer_stop(); + if($conf['usegzip']){ + $sitemap = DOKU_INC.'sitemap.xml.gz'; + }else{ + $sitemap = DOKU_INC.'sitemap.xml'; + } -//Setup VIM: ex: et ts=4 enc=utf-8 : + if(!is_writable($sitemap)) return false; + if(@filesize($sitemap) && + @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){ + return false; + } + + ob_start(); + $pages = file($conf['cachedir'].'/page.idx'); + + print ''.NL; + print ''.NL; + foreach($pages as $id){ + $id = trim($id); + $file = wikiFN($id); + + //skip hidden, non existing and restricted files + if(isHiddenPage($id)) return false; + $date = @filemtime($file); + if(!$date) continue; + if(auth_aclcheck($id,'','') < AUTH_READ) continue; + + print ' '.NL; + print ' '.wl($id,'',true).''.NL; + print ' '.date('Y-m-d\TH:i:s',$date).''.NL; + print ' '.NL; + } + print ''.NL; + + $data = ob_get_contents(); + ob_end_clean(); + + io_saveFile($sitemap,$data); + return true; +} + +/** + * Just send a 1x1 pixel blank gif to the browser + * + * @author Andreas Gohr + * @author Harry Fuecks + */ +function sendGIF(){ + $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); + header('Content-Type: image/gif'); + header('Content-Length: '.strlen($img)); + header('Connection: Close'); + print $img; + flush(); + // Browser should drop connection after this + // Thinks it's got the whole image +} + +//Setup VIM: ex: et ts=4 enc=utf-8 : // No trailing PHP closing tag - no output please! // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php -- cgit v1.2.3