summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Hamann <michael@content-space.de>2010-04-07 11:31:50 +0200
committerMichael Hamann <michael@content-space.de>2010-09-22 17:55:07 +0200
commitc4f79b71351dd0d96f19f7c5629888d85a814c72 (patch)
tree404b45b1606d3b9153faa3a16c8c9e56712e91da
parent42de51b18d302d07e109c24dfb0ae237e6d38643 (diff)
downloadrpg-c4f79b71351dd0d96f19f7c5629888d85a814c72.tar.gz
rpg-c4f79b71351dd0d96f19f7c5629888d85a814c72.tar.bz2
Sitemap rewrite
-rw-r--r--inc/actions.php57
-rw-r--r--inc/common.php15
-rw-r--r--inc/sitemap.php101
-rw-r--r--lib/exe/indexer.php98
4 files changed, 174 insertions, 97 deletions
diff --git a/inc/actions.php b/inc/actions.php
index 3e0cb1207..2d70ac8ed 100644
--- a/inc/actions.php
+++ b/inc/actions.php
@@ -56,6 +56,10 @@ function act_dispatch(){
//check permissions
$ACT = act_permcheck($ACT);
+ //sitemap
+ if ($ACT == 'sitemap')
+ $ACT = act_sitemap($ACT);
+
//register
$nil = array();
if($ACT == 'register' && $_POST['save'] && register()){
@@ -205,7 +209,7 @@ function act_clean($act){
'preview','search','show','check','index','revisions',
'diff','recent','backlink','admin','subscribe','revert',
'unsubscribe','profile','resendpwd','recover',
- 'draftdel','subscribens','unsubscribens',)) && substr($act,0,7) != 'export_' ) {
+ 'draftdel','subscribens','unsubscribens','sitemap')) && substr($act,0,7) != 'export_' ) {
msg('Command unknown: '.htmlspecialchars($act),-1);
return 'show';
}
@@ -233,7 +237,8 @@ function act_permcheck($act){
}else{
$permneed = AUTH_CREATE;
}
- }elseif(in_array($act,array('login','search','recent','profile','index'))){
+ }elseif(in_array($act,array('login','search','recent','profile','index', 'sitemap'))){
+ }elseif(in_array($act,array('login','search','recent','profile','sitemap'))){
$permneed = AUTH_NONE;
}elseif($act == 'revert'){
$permneed = AUTH_ADMIN;
@@ -587,6 +592,54 @@ function act_export($act){
}
/**
+ * Handle sitemap delivery
+ *
+ * @author Michael Hamann <michael@content-space.de>
+ */
+function act_sitemap($act) {
+ global $conf;
+
+ if (!$conf['sitemap']) {
+ header("HTTP/1.0 404 Not Found");
+ print "Sitemap generation is disabled.";
+ exit;
+ }
+
+ $sitemap = $conf['cachedir'].'/sitemap.xml';
+ if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){
+ $mime = 'application/x-gzip';
+ $sitemap .= '.gz';
+ } else {
+ $mime = 'application/xml; charset=utf-8';
+ }
+
+ // Check if sitemap file exists, otherwise create it
+ if (!is_readable($sitemap)) {
+ require_once DOKU_INC.'inc/sitemap.php';
+ sitemapGenerate();
+ }
+
+ if (is_readable($sitemap)) {
+ // Send headers
+ header('Content-Type: '.$mime);
+
+ // Send file
+ //use x-sendfile header to pass the delivery to compatible webservers
+ if (http_sendfile($sitemap)) exit;
+
+ $fp = @fopen($sitemap,"rb");
+ if($fp){
+ http_rangeRequest($fp,filesize($sitemap),$mime);
+ exit;
+ }
+ }
+
+ header("HTTP/1.0 500 Internal Server Error");
+ print "Could not read $sitemap - bad permissions?";
+ exit;
+}
+
+/**
* Handle page 'subscribe'
*
* Throws exception on error.
diff --git a/inc/common.php b/inc/common.php
index bf5987c28..0816d9fbb 100644
--- a/inc/common.php
+++ b/inc/common.php
@@ -1267,6 +1267,21 @@ function dformat($dt=null,$format=''){
}
/**
+ * Formats a timestamp as ISO 8601 date
+ *
+ * @author <ungu at terong dot com>
+ * @link http://www.php.net/manual/en/function.date.php#54072
+ */
+function date_iso8601($int_date) {
+ //$int_date: current date in UNIX timestamp
+ $date_mod = date('Y-m-d\TH:i:s', $int_date);
+ $pre_timezone = date('O', $int_date);
+ $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2);
+ $date_mod .= $time_zone;
+ return $date_mod;
+}
+
+/**
* return an obfuscated email address in line with $conf['mailguard'] setting
*
* @author Harry Fuecks <hfuecks@gmail.com>
diff --git a/inc/sitemap.php b/inc/sitemap.php
new file mode 100644
index 000000000..bbed7d269
--- /dev/null
+++ b/inc/sitemap.php
@@ -0,0 +1,101 @@
+<?php
+/**
+ * Sitemap handling functions
+ *
+ * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
+ * @author Michael Hamann <michael@content-space.de>
+ */
+
+if(!defined('DOKU_INC')) die('meh.');
+
+/**
+ * Builds a Google Sitemap of all public pages known to the indexer
+ *
+ * The map is placed in the cache directory named sitemap.xml.gz - This
+ * file needs to be writable!
+ *
+ * @author Andreas Gohr
+ * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
+ */
+function sitemapGenerate(){
+ global $conf;
+ dbglog('sitemapGenerate(): started');
+ if(!$conf['sitemap']) return false;
+
+ $sitemap = sitemapGetFilePath();
+ dbglog("runSitemapper(): using $sitemap");
+
+ if(@file_exists($sitemap)){
+ if(!is_writable($sitemap)) return false;
+ }else{
+ if(!is_writable(dirname($sitemap))) return false;
+ }
+
+ if(@filesize($sitemap) &&
+ @filemtime($sitemap) > (time()-($conf['sitemap']*60*60*24))){
+ dbglog('runSitemapper(): Sitemap up to date');
+ return false;
+ }
+
+ $pages = idx_getIndex('page', '');
+ dbglog('runSitemapper(): creating sitemap using '.count($pages).' pages');
+
+ // build the sitemap
+ ob_start();
+ print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
+ print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
+ foreach($pages as $id){
+ $id = trim($id);
+ $file = wikiFN($id);
+
+ //skip hidden, non existing and restricted files
+ if(isHiddenPage($id)) continue;
+ $date = @filemtime($file);
+ if(!$date) continue;
+ if(auth_aclcheck($id,'','') < AUTH_READ) continue;
+
+ print ' <url>'.NL;
+ print ' <loc>'.wl($id,'',true).'</loc>'.NL;
+ print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL;
+ print ' </url>'.NL;
+ }
+ print '</urlset>'.NL;
+ $data = ob_get_contents();
+ ob_end_clean();
+
+ //save the new sitemap
+ return io_saveFile($sitemap,$data);
+}
+
+function sitemapGetFilePath() {
+ global $conf;
+
+ $sitemap = $conf['cachedir'].'/sitemap.xml';
+ if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){
+ $sitemap .= '.gz';
+ }
+
+ return $sitemap;
+}
+
+function sitemapPingSearchEngines() {
+ //ping search engines...
+ $http = new DokuHTTPClient();
+ $http->timeout = 8;
+
+ $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&'));
+ $ping_urls = array(
+ 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url,
+ 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='.$encoded_sitemap_url,
+ 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url,
+ );
+
+ foreach ($ping_urls as $name => $url) {
+ dbglog("sitemapPingSearchEngines(): pinging $name");
+ $resp = $http->get($url);
+ if($http->error) dbglog("runSitemapper(): $http->error");
+ dbglog('runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)));
+ }
+
+ return true;
+}
diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php
index f8e2f7981..63ad5931f 100644
--- a/lib/exe/indexer.php
+++ b/lib/exe/indexer.php
@@ -232,88 +232,11 @@ function metaUpdate(){
* @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
*/
function runSitemapper(){
- global $conf;
print "runSitemapper(): started".NL;
- if(!$conf['sitemap']) return false;
-
- if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){
- $sitemap = 'sitemap.xml.gz';
- }else{
- $sitemap = 'sitemap.xml';
- }
- print "runSitemapper(): using $sitemap".NL;
-
- if(@file_exists(DOKU_INC.$sitemap)){
- if(!is_writable(DOKU_INC.$sitemap)) return false;
- }else{
- if(!is_writable(DOKU_INC)) return false;
- }
-
- if(@filesize(DOKU_INC.$sitemap) &&
- @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){
- print 'runSitemapper(): Sitemap up to date'.NL;
- return false;
- }
-
- $pages = idx_getIndex('page', '');
- print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL;
-
- // build the sitemap
- ob_start();
- print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
- print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
- foreach($pages as $id){
- $id = trim($id);
- $file = wikiFN($id);
-
- //skip hidden, non existing and restricted files
- if(isHiddenPage($id)) continue;
- $date = @filemtime($file);
- if(!$date) continue;
- if(auth_aclcheck($id,'','') < AUTH_READ) continue;
-
- print ' <url>'.NL;
- print ' <loc>'.wl($id,'',true).'</loc>'.NL;
- print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL;
- print ' </url>'.NL;
- }
- print '</urlset>'.NL;
- $data = ob_get_contents();
- ob_end_clean();
-
- //save the new sitemap
- io_saveFile(DOKU_INC.$sitemap,$data);
-
- //ping search engines...
- $http = new DokuHTTPClient();
- $http->timeout = 8;
-
- //ping google
- print 'runSitemapper(): pinging google'.NL;
- $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap=';
- $url .= urlencode(DOKU_URL.$sitemap);
- $resp = $http->get($url);
- if($http->error) print 'runSitemapper(): '.$http->error.NL;
- print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL;
-
- //ping yahoo
- print 'runSitemapper(): pinging yahoo'.NL;
- $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url=';
- $url .= urlencode(DOKU_URL.$sitemap);
- $resp = $http->get($url);
- if($http->error) print 'runSitemapper(): '.$http->error.NL;
- print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL;
-
- //ping microsoft
- print 'runSitemapper(): pinging microsoft'.NL;
- $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap=';
- $url .= urlencode(DOKU_URL.$sitemap);
- $resp = $http->get($url);
- if($http->error) print 'runSitemapper(): '.$http->error.NL;
- print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL;
-
+ require_once DOKU_INC.'inc/sitemap.php';
+ $result = sitemapGenerate() && sitemapPingSearchEngines();
print 'runSitemapper(): finished'.NL;
- return true;
+ return $result;
}
/**
@@ -409,21 +332,6 @@ function sendDigest() {
}
/**
- * Formats a timestamp as ISO 8601 date
- *
- * @author <ungu at terong dot com>
- * @link http://www.php.net/manual/en/function.date.php#54072
- */
-function date_iso8601($int_date) {
- //$int_date: current date in UNIX timestamp
- $date_mod = date('Y-m-d\TH:i:s', $int_date);
- $pre_timezone = date('O', $int_date);
- $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2);
- $date_mod .= $time_zone;
- return $date_mod;
-}
-
-/**
* Just send a 1x1 pixel blank gif to the browser
*
* @author Andreas Gohr <andi@splitbrain.org>