diff options
author | Michael Hamann <michael@content-space.de> | 2010-11-15 22:36:26 +0100 |
---|---|---|
committer | Michael Hamann <michael@content-space.de> | 2010-11-15 22:36:26 +0100 |
commit | 4753bcc0e2fd9417e885e128e8c9ab4bfc566c32 (patch) | |
tree | 9b24c39acab8046d902b64b9f5b25812ac3a7f29 /inc/indexer.php | |
parent | e5e503830f067ce7305e22eac58c78c2f4a007d2 (diff) | |
download | rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.gz rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.bz2 |
Indexer improvement: regex instead of arrays for lines
When updating a single line that line was split into an array and in a
loop over that array one entry was removed and afterwards a new one
added. Tests have shown that using a regex for doing that is much faster
which can be easily explained as that regex is very simple to match
while a loop over an array isn't that fast. As that update function is
called for every word in a page the impact of this change is
significant.
Diffstat (limited to 'inc/indexer.php')
-rw-r--r-- | inc/indexer.php | 27 |
1 files changed, 10 insertions, 17 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index 8174f73d0..954512673 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -391,26 +391,19 @@ function idx_writeIndexLine($fh,$line,$pid,$count){ * @author Andreas Gohr <andi@splitbrain.org> */ function idx_updateIndexLine($line,$pid,$count){ - $line = trim($line); - $updated = array(); - if($line != ''){ - $parts = explode(':',$line); - // remove doc from given line - foreach($parts as $part){ - if($part == '') continue; - list($doc,$cnt) = explode('*',$part); - if($doc != $pid){ - $updated[] = $part; - } - } + if ($line == ''){ + $newLine = "\n"; + }else{ + $newLine = preg_replace('/(^|:)'.preg_quote($pid, '/').'\*\d*/', '', $line); } - - // add doc if ($count){ - $updated[] = "$pid*$count"; + if (strlen($newLine) > 1){ + return "$pid*$count:".$newLine; + }else{ + return "$pid*$count".$newLine; + } } - - return join(':',$updated)."\n"; + return $newLine; } /** |