From 4efb9a42baed214da00a12a58c72586499dafc95 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 18 Jun 2006 15:45:15 +0200 Subject: fixed stupid bug in search query parser darcs-hash:20060618134515-7ad00-3097e310ccdaf793b5da3bd49a54723fea7ec260.gz --- _test/cases/inc/utf8_stripspecials.test.php | 27 +++++++++++++++++++++++++++ inc/indexer.php | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 _test/cases/inc/utf8_stripspecials.test.php diff --git a/_test/cases/inc/utf8_stripspecials.test.php b/_test/cases/inc/utf8_stripspecials.test.php new file mode 100644 index 000000000..a42507a3c --- /dev/null +++ b/_test/cases/inc/utf8_stripspecials.test.php @@ -0,0 +1,27 @@ +assertEqual(utf8_stripspecials($test[0],$test[1],$test[2]),$test[3]); + } + } + +} +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/inc/indexer.php b/inc/indexer.php index 789a98d91..4a5d0b4e2 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -357,14 +357,14 @@ function idx_parseIndexLine(&$page_idx,$line){ */ function idx_tokenizer($string,&$stopwords,$wc=false){ $words = array(); - if(!$wc) $wc = '\*'; + $wc = ($wc) ? '' : $wc = '\*'; if(preg_match('/[^0-9A-Za-z]/u', $string)){ // handle asian chars as single words (may fail on older PHP version) $asia = @preg_replace('/('.IDX_ASIAN.')/u','\1 ',$string); if(!is_null($asia)) $string = $asia; //recover from regexp failure - $arr = explode(' ', utf8_stripspecials($string,' ','._\-:'.$wc)); + $arr = explode(' ', utf8_stripspecials($string,' ','\._\-:'.$wc)); foreach ($arr as $w) { if (!is_numeric($w) && strlen($w) < 3) continue; $w = utf8_strtolower($w); -- cgit v1.2.3