fixed stupid bug in search query parser

darcs-hash:20060618134515-7ad00-3097e310ccdaf793b5da3bd49a54723fea7ec260.gz
author: Andreas Gohr <andi@splitbrain.org> 2006-06-18 15:45:15 +0200
committer: Andreas Gohr <andi@splitbrain.org> 2006-06-18 15:45:15 +0200
commit: 4efb9a42baed214da00a12a58c72586499dafc95 (patch)
tree: 649b9cdeb49f7c0ee3474b763f8aeabdd5d58dce
parent: 377f9e9776677a6d3963718bfd48bba361571dfb (diff)
download: rpg-4efb9a42baed214da00a12a58c72586499dafc95.tar.gz
rpg-4efb9a42baed214da00a12a58c72586499dafc95.tar.bz2
2 files changed, 29 insertions, 2 deletions
diff --git a/_test/cases/inc/utf8_stripspecials.test.php b/_test/cases/inc/utf8_stripspecials.test.php
new file mode 100644
index 000000000..a42507a3c
--- /dev/null
+++ b/_test/cases/inc/utf8_stripspecials.test.php
@@ -0,0 +1,27 @@
+<?php
+// use no mbstring help here
+if(!defined('UTF8_NOMBSTRING')) define('UTF8_NOMBSTRING',1);
+require_once DOKU_INC.'inc/utf8.php';
+
+class utf8_stripspecials extends UnitTestCase {
+
+
+    function test1(){
+        // we test multiple cases here - format: string, repl, additional, test
+        $tests   = array();
+        $tests[] = array('asciistring','','','asciistring');
+        $tests[] = array('asciistring','','\._\-:','asciistring');
+        $tests[] = array('ascii.string','','\._\-:','asciistring');
+        $tests[] = array('ascii.string',' ','\._\-:','ascii string');
+        $tests[] = array('2.1.14',' ','\._\-:','2 1 14');
+        $tests[] = array('ascii.string','','\._\-:\*','asciistring');
+        $tests[] = array('ascii.string',' ','\._\-:\*','ascii string');
+        $tests[] = array('2.1.14',' ','\._\-:\*','2 1 14');
+
+        foreach($tests as $test){
+            $this->assertEqual(utf8_stripspecials($test[0],$test[1],$test[2]),$test[3]);
+        }
+    }
+
+}
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/indexer.php b/inc/indexer.php
index 789a98d91..4a5d0b4e2 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -357,14 +357,14 @@ function idx_parseIndexLine(&$page_idx,$line){
  */
 function idx_tokenizer($string,&$stopwords,$wc=false){
     $words = array();
-    if(!$wc) $wc = '\*';
+    $wc = ($wc) ? '' : $wc = '\*';
 
     if(preg_match('/[^0-9A-Za-z]/u', $string)){
         // handle asian chars as single words (may fail on older PHP version)
         $asia = @preg_replace('/('.IDX_ASIAN.')/u','\1 ',$string);
         if(!is_null($asia)) $string = $asia; //recover from regexp failure
 
-        $arr = explode(' ', utf8_stripspecials($string,' ','._\-:'.$wc));
+        $arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc));
         foreach ($arr as $w) {
             if (!is_numeric($w) && strlen($w) < 3) continue;
             $w = utf8_strtolower($w);
author	Andreas Gohr <andi@splitbrain.org>	2006-06-18 15:45:15 +0200
committer	Andreas Gohr <andi@splitbrain.org>	2006-06-18 15:45:15 +0200
commit	4efb9a42baed214da00a12a58c72586499dafc95 (patch)
tree	649b9cdeb49f7c0ee3474b763f8aeabdd5d58dce
parent	377f9e9776677a6d3963718bfd48bba361571dfb (diff)
download	rpg-4efb9a42baed214da00a12a58c72586499dafc95.tar.gz rpg-4efb9a42baed214da00a12a58c72586499dafc95.tar.bz2