summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom N Harris <tnharris@whoopdedo.org>2010-11-14 14:22:08 -0500
committerTom N Harris <tnharris@whoopdedo.org>2010-11-14 14:22:08 -0500
commit4b9792c696658fe0cbedc187198fa463b6ff83fc (patch)
tree74f84d2fef8f7fddc4f08f743788a3f83be2b70a
parentee0891d8ffd7e4a59c958b9546a3b8382e4e5991 (diff)
downloadrpg-4b9792c696658fe0cbedc187198fa463b6ff83fc.tar.gz
rpg-4b9792c696658fe0cbedc187198fa463b6ff83fc.tar.bz2
Measure length of multi-character Asian words
-rw-r--r--inc/indexer.php6
1 files changed, 4 insertions, 2 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 7a8bb3ff8..d9eccac76 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -52,8 +52,10 @@ function wordlen($w){
$l = strlen($w);
// If left alone, all chinese "words" will get put into w3.idx
// So the "length" of a "word" is faked
- if(preg_match('/'.IDX_ASIAN2.'/u',$w))
- $l += ord($w) - 0xE1; // Lead bytes from 0xE2-0xEF
+ if(preg_match_all('/[\xE2-\xEF]/',$w,$leadbytes)) {
+ foreach($leadbytes[0] as $b)
+ $l += ord($b) - 0xE1;
+ }
return $l;
}