Indexer improvement: regex instead of arrays for lines

When updating a single line that line was split into an array and in a loop over that array one entry was removed and afterwards a new one added. Tests have shown that using a regex for doing that is much faster which can be easily explained as that regex is very simple to match while a loop over an array isn't that fast. As that update function is called for every word in a page the impact of this change is significant.
author: Michael Hamann <michael@content-space.de> 2010-11-15 22:36:26 +0100
committer: Michael Hamann <michael@content-space.de> 2010-11-15 22:36:26 +0100
commit: 4753bcc0e2fd9417e885e128e8c9ab4bfc566c32 (patch)
tree: 9b24c39acab8046d902b64b9f5b25812ac3a7f29 /inc/indexer.php
parent: e5e503830f067ce7305e22eac58c78c2f4a007d2 (diff)
download: rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.gz
rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.bz2
1 files changed, 10 insertions, 17 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 8174f73d0..954512673 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -391,26 +391,19 @@ function idx_writeIndexLine($fh,$line,$pid,$count){
  * @author Andreas Gohr <andi@splitbrain.org>
  */
 function idx_updateIndexLine($line,$pid,$count){
-    $line = trim($line);
-    $updated = array();
-    if($line != ''){
-        $parts = explode(':',$line);
-        // remove doc from given line
-        foreach($parts as $part){
-            if($part == '') continue;
-            list($doc,$cnt) = explode('*',$part);
-            if($doc != $pid){
-                $updated[] = $part;
-            }
-        }
+    if ($line == ''){
+        $newLine = "\n";
+    }else{
+        $newLine = preg_replace('/(^|:)'.preg_quote($pid, '/').'\*\d*/', '', $line);
     }
-
-    // add doc
     if ($count){
-        $updated[] = "$pid*$count";
+        if (strlen($newLine) > 1){
+            return "$pid*$count:".$newLine;
+        }else{
+            return "$pid*$count".$newLine;
+        }
     }
-
-    return join(':',$updated)."\n";
+    return $newLine;
 }
 
 /**
author	Michael Hamann <michael@content-space.de>	2010-11-15 22:36:26 +0100
committer	Michael Hamann <michael@content-space.de>	2010-11-15 22:36:26 +0100
commit	4753bcc0e2fd9417e885e128e8c9ab4bfc566c32 (patch)
tree	9b24c39acab8046d902b64b9f5b25812ac3a7f29 /inc/indexer.php
parent	e5e503830f067ce7305e22eac58c78c2f4a007d2 (diff)
download	rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.gz rpg-4753bcc0e2fd9417e885e128e8c9ab4bfc566c32.tar.bz2