summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBen Coburn <btcoburn@silicodon.net>2006-08-30 20:27:53 +0200
committerBen Coburn <btcoburn@silicodon.net>2006-08-30 20:27:53 +0200
commit71726d7801bdcbf41dfdc79d244f09a0988529c0 (patch)
tree4923946228e6b3cd26107aa310a877ad5bc948a9 /lib
parent19a3223378923679493484987e6719c16b5f5997 (diff)
downloadrpg-71726d7801bdcbf41dfdc79d244f09a0988529c0.tar.gz
rpg-71726d7801bdcbf41dfdc79d244f09a0988529c0.tar.bz2
scalable changelog redesign
This patch provides a rewritten changelog system that is designed to run efficiently on both small and large wikis. The patch includes a plugin to convert changelogs from the current format. The conversion is non-destructive and happens automatically. For more information on the new changelog format see "http://wiki.splitbrain.org/wiki:changelog". Structure In short the changelog is now stored in per-page changelog files, with a recent changes cache. The recent changes cache is kept in "/data/meta/_dokuwiki.changes" and trimmed daily. The per-page changelogs are kept in "/data/meta/<ns>/<page_id>.changes" files. To preserve revision information for revisions stored in the attic, the "*.changes" files are not removed when their page is deleted. This allows the full life-cycle of page creation, deletion, and reversion to be tracked. Format The changelog line format now uses a general "line type" field in place of the special "minor" change syntax. There is also an extra field that can be used to store arbitrary data associated with special line types. The reverted line type (R) is a good example. There the extra field holds the revision date used as the source for reverting the page. See the wiki for the complete syntax description. Code Notes The changelog functions have been rewritten to load the whole file only if it is small. For larger files, the function loads only the relevant chunk(s). Parsed changelog lines are cached in memory to speed future function calls. getRevisionInfo A binary search is used to locate the chunk expected to contain the requested revision. The whole chunk is parsed, and adjacent lines are optimistically cached to speed consecutive calls. getRevisions Reads the changelog file backwards (newest first) in chunks until the requested number of lines have been read. Parsed changelog lines are cached for subsequent calls to getRevisionInfo. Because revisions are read from the changelog they are no longer guaranteed to exist in the attic. (Note: Even with lines of arbitrary length getRevisionInfo and getRevisions never split changelog lines while reading. This is done by sliding the "file pointer" forward to the end of a line after each blind seek.) isMinor Removed. To detect a minor edit check the type as follows: $parsed_logline['type'] darcs-hash:20060830182753-05dcb-1c5ea17f581197a33732a8d11da223d809c03506.gz
Diffstat (limited to 'lib')
-rw-r--r--lib/exe/indexer.php69
-rw-r--r--lib/plugins/config/lang/en/lang.php1
-rw-r--r--lib/plugins/config/settings/config.metadata.php1
-rw-r--r--lib/plugins/importoldchangelog/action.php177
-rw-r--r--lib/plugins/plugin/admin.php2
5 files changed, 248 insertions, 2 deletions
diff --git a/lib/exe/indexer.php b/lib/exe/indexer.php
index 2728e5665..d65707911 100644
--- a/lib/exe/indexer.php
+++ b/lib/exe/indexer.php
@@ -27,7 +27,7 @@ if(@ignore_user_abort()){
if(!$_REQUEST['debug']) ob_start();
// run one of the jobs
-runIndexer() or metaUpdate() or runSitemapper();
+runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges();
if($defer) sendGIF();
if(!$_REQUEST['debug']) ob_end_clean();
@@ -36,6 +36,73 @@ exit;
// --------------------------------------------------------------------
/**
+ * Trims the recent changes cache (or imports the old changelog) as needed.
+ *
+ * @author Ben Coburn <btcoburn@silicodon.net>
+ */
+function runTrimRecentChanges() {
+ global $conf;
+
+ // Import old changelog (if needed)
+ // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly.
+ // FIXME: Remove this from runTrimRecentChanges when it is no longer needed.
+ if (isset($conf['changelog_old']) &&
+ file_exists($conf['changelog_old']) && !file_exists($conf['changelog']) &&
+ !file_exists($conf['changelog'].'_importing') && !file_exists($conf['changelog'].'_tmp')) {
+ $tmp = array(); // no event data
+ trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp);
+ return true;
+ }
+
+ // Trim the Recent Changes
+ // Trims the recent changes cache to the last $conf['changes_days'] recent
+ // changes or $conf['recent'] items, which ever is larger.
+ // The trimming is only done once a day.
+ if (file_exists($conf['changelog']) &&
+ (filectime($conf['changelog'])+86400)<time() &&
+ !file_exists($conf['changelog'].'_tmp')) {
+ io_lock($conf['changelog']);
+ $lines = file($conf['changelog']);
+ if (count($lines)<$conf['recent']) {
+ // nothing to trim
+ io_unlock($conf['changelog']);
+ return true;
+ }
+ // trim changelog
+ io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock
+ $kept = 0;
+ $trim_time = time() - $conf['recent_days']*86400;
+ $out_lines = array();
+ // check lines from newest to oldest
+ for ($i = count($lines)-1; $i >= 0; $i--) {
+ $tmp = parseChangelogLine($lines[$i]);
+ if ($tmp===false) { continue; }
+ if ($tmp['date']>$trim_time || $kept<$conf['recent']) {
+ array_push($out_lines, implode("\t", $tmp)."\n");
+ $kept++;
+ } else {
+ // no more lines worth keeping
+ break;
+ }
+ }
+ io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines));
+ unlink($conf['changelog']);
+ if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) {
+ // rename failed so try another way...
+ io_unlock($conf['changelog']);
+ io_saveFile($conf['changelog'], implode('', $out_lines));
+ unlink($conf['changelog'].'_tmp');
+ } else {
+ io_unlock($conf['changelog']);
+ }
+ return true;
+ }
+
+ // nothing done
+ return false;
+}
+
+/**
* Runs the indexer for the current page
*
* @author Andreas Gohr <andi@splitbrain.org>
diff --git a/lib/plugins/config/lang/en/lang.php b/lib/plugins/config/lang/en/lang.php
index c2bd5aacf..4c4c713e5 100644
--- a/lib/plugins/config/lang/en/lang.php
+++ b/lib/plugins/config/lang/en/lang.php
@@ -124,6 +124,7 @@ $lang['sitemap'] = 'Generate Google sitemap (days)';
$lang['rss_type'] = 'XML feed type';
$lang['rss_linkto'] = 'XML feed links to';
$lang['rss_update'] = 'XML feed update interval (sec)';
+$lang['recent_days'] = 'How many recent changes to keep (days)';
/* Target options */
$lang['target____wiki'] = 'Target window for internal links';
diff --git a/lib/plugins/config/settings/config.metadata.php b/lib/plugins/config/settings/config.metadata.php
index 0dd9f1de3..b55c0e930 100644
--- a/lib/plugins/config/settings/config.metadata.php
+++ b/lib/plugins/config/settings/config.metadata.php
@@ -162,6 +162,7 @@ $meta['sitemap'] = array('numeric');
$meta['rss_type'] = array('multichoice','_choices' => array('rss','rss1','rss2','atom'));
$meta['rss_linkto'] = array('multichoice','_choices' => array('diff','page','rev','current'));
$meta['rss_update'] = array('numeric');
+$meta['recent_days'] = array('numeric');
$meta['_network'] = array('fieldset');
$meta['proxy____host'] = array('string','_pattern' => '#^[a-z0-9\-\.+]+?#i');
diff --git a/lib/plugins/importoldchangelog/action.php b/lib/plugins/importoldchangelog/action.php
new file mode 100644
index 000000000..400ff6a18
--- /dev/null
+++ b/lib/plugins/importoldchangelog/action.php
@@ -0,0 +1,177 @@
+<?php
+// must be run within Dokuwiki
+if(!defined('DOKU_INC')) die();
+
+if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
+require_once(DOKU_PLUGIN.'action.php');
+
+class action_plugin_importoldchangelog extends DokuWiki_Action_Plugin {
+
+ function getInfo(){
+ return array(
+ 'author' => 'Ben Coburn',
+ 'email' => 'btcoburn@silicodon.net',
+ 'date' => '2006-08-30',
+ 'name' => 'Import Old Changelog',
+ 'desc' => 'Imports and converts the single file changelog '.
+ 'from the 2006-03-09b release to the new format. '.
+ 'Also reconstructs missing changelog data from '.
+ 'old revisions kept in the attic.',
+ 'url' => 'http://wiki.splitbrain.org/wiki:changelog'
+ );
+ }
+
+ function register(&$controller) {
+ $controller->register_hook('TEMPORARY_CHANGELOG_UPGRADE_EVENT', 'BEFORE', $this, 'run_import');
+ }
+
+ function importOldLog($line, &$logs) {
+ global $lang;
+ /*
+ // Note: old log line format
+ //$info['date'] = $tmp[0];
+ //$info['ip'] = $tmp[1];
+ //$info['id'] = $tmp[2];
+ //$info['user'] = $tmp[3];
+ //$info['sum'] = $tmp[4];
+ */
+ $oldline = @explode("\t", $line);
+ if ($oldline!==false && count($oldline)>1) {
+ // trim summary
+ $wasMinor = (substr($oldline[4], 0, 1)==='*');
+ $sum = rtrim(substr($oldline[4], 1), "\n");
+ // guess line type
+ $type = 'E';
+ if ($wasMinor) { $type = 'e'; }
+ if ($sum===$lang['created']) { $type = 'C'; }
+ if ($sum===$lang['deleted']) { $type = 'D'; }
+ // build new log line
+ $tmp = array();
+ $tmp['date'] = $oldline[0];
+ $tmp['ip'] = $oldline[1];
+ $tmp['type'] = $type;
+ $tmp['id'] = $oldline[2];
+ $tmp['user'] = $oldline[3];
+ $tmp['sum'] = $sum;
+ $tmp['extra'] = '';
+ // order line by id
+ if (!isset($logs[$tmp['id']])) { $logs[$tmp['id']] = array(); }
+ $logs[$tmp['id']][$tmp['date']] = $tmp;
+ }
+ }
+
+ function importFromAttic(&$logs) {
+ global $conf, $lang;
+ $base = $conf['olddir'];
+ $stack = array('');
+ $context = ''; // namespace
+ while (count($stack)>0){
+ $context = array_pop($stack);
+ $dir = dir($base.'/'.str_replace(':', '/', $context));
+
+ while (($file = $dir->read()) !== false) {
+ if ($file==='.' || $file==='..') { continue; }
+ $matches = array();
+ if (preg_match('/([^.]*)\.([^.]*)\..*/', $file, $matches)===1) {
+ $id = (($context=='')?'':$context.':').$matches[1];
+ $date = $matches[2];
+
+ // check if page & revision are already logged
+ if (!isset($logs[$id])) { $logs[$id] = array(); }
+ if (!isset($logs[$id][$date])) {
+ $tmp = array();
+ $tmp['date'] = $date;
+ $tmp['ip'] = '127.0.0.1'; // original ip lost
+ $tmp['type'] = 'E';
+ $tmp['id'] = $id;
+ $tmp['user'] = ''; // original user lost
+ $tmp['sum'] = '('.$lang['restored'].')'; // original summary lost
+ $tmp['extra'] = '';
+ $logs[$id][$date] = $tmp;
+ }
+
+ } else if (is_dir($dir->path.'/'.$file)) {
+ array_push($stack, (($context=='')?'':$context.':').$file);
+ }
+
+ }
+
+ $dir->close();
+ }
+
+ }
+
+ function savePerPageChanges($id, &$changes, &$recent, $trim_time) {
+ $out_lines = array();
+ ksort($changes); // ensure correct order of changes from attic
+ foreach ($changes as $tmp) {
+ $line = implode("\t", $tmp)."\n";
+ array_push($out_lines, $line);
+ if ($tmp['date']>$trim_time) {
+ $recent[$tmp['date']] = $line;
+ }
+ }
+ io_saveFile(metaFN($id, '.changes'), implode('', $out_lines));
+ }
+
+ function resetTimer() {
+ // Add 5 minutes to the script execution timer...
+ // This should be much more than needed.
+ set_time_limit(5*60);
+ // Note: Has no effect in safe-mode!
+ }
+
+ function run_import(&$event, $args) {
+ global $conf;
+ register_shutdown_function('importoldchangelog_plugin_shutdown');
+ touch($conf['changelog'].'_importing'); // changelog importing lock
+ io_saveFile($conf['changelog'], ''); // pre-create changelog
+ io_lock($conf['changelog']); // hold onto the lock
+ // load old changelog
+ $this->resetTimer();
+ $log = array();
+ $oldlog = file($conf['changelog_old']);
+ foreach ($oldlog as $line) {
+ $this->importOldLog($line, $log);
+ }
+ unset($oldlog); // free memory
+ // look in the attic for unlogged revisions
+ $this->resetTimer();
+ $this->importFromAttic($log);
+ // save per-page changelogs
+ $this->resetTimer();
+ $recent = array();
+ $trim_time = time() - $conf['recent_days']*86400;
+ foreach ($log as $id => $page) {
+ $this->savePerPageChanges($id, $page, $recent, $trim_time);
+ }
+ // save recent changes cache
+ $this->resetTimer();
+ ksort($recent); // ensure correct order of recent changes
+ io_unlock($conf['changelog']); // hand off the lock to io_saveFile
+ io_saveFile($conf['changelog'], implode('', $recent));
+ unlink($conf['changelog'].'_importing'); // changelog importing unlock
+ }
+
+}
+
+function importoldchangelog_plugin_shutdown() {
+ global $conf;
+ $path = array();
+ $path['changelog'] = $conf['changelog'];
+ $path['importing'] = $conf['changelog'].'_importing';
+ $path['failed'] = $conf['changelog'].'_failed';
+ $path['import_ok'] = $conf['changelog'].'_import_ok';
+ io_unlock($path['changelog']); // guarantee unlocking
+ if (file_exists($path['importing'])) {
+ // import did not finish
+ rename($path['importing'], $path['failed']) or trigger_error('Importing changelog failed.', E_USER_WARNING);
+ @unlink($path['import_ok']);
+ } else {
+ // import successful
+ touch($path['import_ok']);
+ @unlink($path['failed']);
+ }
+}
+
+
diff --git a/lib/plugins/plugin/admin.php b/lib/plugins/plugin/admin.php
index ef142c4cc..2c47de665 100644
--- a/lib/plugins/plugin/admin.php
+++ b/lib/plugins/plugin/admin.php
@@ -23,7 +23,7 @@ require_once(DOKU_PLUGIN.'admin.php');
// plugins that are an integral part of dokuwiki, they shouldn't be disabled or deleted
global $plugin_protected;
- $plugin_protected = array('acl','plugin','config','info','usermanager');
+ $plugin_protected = array('acl','plugin','config','info','usermanager', 'importoldchangelog');
/**
* All DokuWiki plugins to extend the admin function