summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2006-06-18 15:45:15 +0200
committerAndreas Gohr <andi@splitbrain.org>2006-06-18 15:45:15 +0200
commit4efb9a42baed214da00a12a58c72586499dafc95 (patch)
tree649b9cdeb49f7c0ee3474b763f8aeabdd5d58dce
parent377f9e9776677a6d3963718bfd48bba361571dfb (diff)
downloadrpg-4efb9a42baed214da00a12a58c72586499dafc95.tar.gz
rpg-4efb9a42baed214da00a12a58c72586499dafc95.tar.bz2
fixed stupid bug in search query parser
darcs-hash:20060618134515-7ad00-3097e310ccdaf793b5da3bd49a54723fea7ec260.gz
-rw-r--r--_test/cases/inc/utf8_stripspecials.test.php27
-rw-r--r--inc/indexer.php4
2 files changed, 29 insertions, 2 deletions
diff --git a/_test/cases/inc/utf8_stripspecials.test.php b/_test/cases/inc/utf8_stripspecials.test.php
new file mode 100644
index 000000000..a42507a3c
--- /dev/null
+++ b/_test/cases/inc/utf8_stripspecials.test.php
@@ -0,0 +1,27 @@
+<?php
+// use no mbstring help here
+if(!defined('UTF8_NOMBSTRING')) define('UTF8_NOMBSTRING',1);
+require_once DOKU_INC.'inc/utf8.php';
+
+class utf8_stripspecials extends UnitTestCase {
+
+
+ function test1(){
+ // we test multiple cases here - format: string, repl, additional, test
+ $tests = array();
+ $tests[] = array('asciistring','','','asciistring');
+ $tests[] = array('asciistring','','\._\-:','asciistring');
+ $tests[] = array('ascii.string','','\._\-:','asciistring');
+ $tests[] = array('ascii.string',' ','\._\-:','ascii string');
+ $tests[] = array('2.1.14',' ','\._\-:','2 1 14');
+ $tests[] = array('ascii.string','','\._\-:\*','asciistring');
+ $tests[] = array('ascii.string',' ','\._\-:\*','ascii string');
+ $tests[] = array('2.1.14',' ','\._\-:\*','2 1 14');
+
+ foreach($tests as $test){
+ $this->assertEqual(utf8_stripspecials($test[0],$test[1],$test[2]),$test[3]);
+ }
+ }
+
+}
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/inc/indexer.php b/inc/indexer.php
index 789a98d91..4a5d0b4e2 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -357,14 +357,14 @@ function idx_parseIndexLine(&$page_idx,$line){
*/
function idx_tokenizer($string,&$stopwords,$wc=false){
$words = array();
- if(!$wc) $wc = '\*';
+ $wc = ($wc) ? '' : $wc = '\*';
if(preg_match('/[^0-9A-Za-z]/u', $string)){
// handle asian chars as single words (may fail on older PHP version)
$asia = @preg_replace('/('.IDX_ASIAN.')/u','\1 ',$string);
if(!is_null($asia)) $string = $asia; //recover from regexp failure
- $arr = explode(' ', utf8_stripspecials($string,' ','._\-:'.$wc));
+ $arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc));
foreach ($arr as $w) {
if (!is_numeric($w) && strlen($w) < 3) continue;
$w = utf8_strtolower($w);