summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorMichael Hamann <michael@content-space.de>2010-11-15 22:36:26 +0100
committerMichael Hamann <michael@content-space.de>2010-11-15 22:36:26 +0100
commit4753bcc0e2fd9417e885e128e8c9ab4bfc566c32 (patch)
tree9b24c39acab8046d902b64b9f5b25812ac3a7f29 /inc/indexer.php
parente5e503830f067ce7305e22eac58c78c2f4a007d2 (diff)
downloadrpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.gz
rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.bz2
Indexer improvement: regex instead of arrays for lines
When updating a single line that line was split into an array and in a loop over that array one entry was removed and afterwards a new one added. Tests have shown that using a regex for doing that is much faster which can be easily explained as that regex is very simple to match while a loop over an array isn't that fast. As that update function is called for every word in a page the impact of this change is significant.
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php27
1 files changed, 10 insertions, 17 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 8174f73d0..954512673 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -391,26 +391,19 @@ function idx_writeIndexLine($fh,$line,$pid,$count){
* @author Andreas Gohr <andi@splitbrain.org>
*/
function idx_updateIndexLine($line,$pid,$count){
- $line = trim($line);
- $updated = array();
- if($line != ''){
- $parts = explode(':',$line);
- // remove doc from given line
- foreach($parts as $part){
- if($part == '') continue;
- list($doc,$cnt) = explode('*',$part);
- if($doc != $pid){
- $updated[] = $part;
- }
- }
+ if ($line == ''){
+ $newLine = "\n";
+ }else{
+ $newLine = preg_replace('/(^|:)'.preg_quote($pid, '/').'\*\d*/', '', $line);
}
-
- // add doc
if ($count){
- $updated[] = "$pid*$count";
+ if (strlen($newLine) > 1){
+ return "$pid*$count:".$newLine;
+ }else{
+ return "$pid*$count".$newLine;
+ }
}
-
- return join(':',$updated)."\n";
+ return $newLine;
}
/**