diff options
author | Tom N Harris <tnharris@whoopdedo.org> | 2010-11-14 14:22:08 -0500 |
---|---|---|
committer | Tom N Harris <tnharris@whoopdedo.org> | 2010-11-14 14:22:08 -0500 |
commit | 4b9792c696658fe0cbedc187198fa463b6ff83fc (patch) | |
tree | 74f84d2fef8f7fddc4f08f743788a3f83be2b70a | |
parent | ee0891d8ffd7e4a59c958b9546a3b8382e4e5991 (diff) | |
download | rpg-4b9792c696658fe0cbedc187198fa463b6ff83fc.tar.gz rpg-4b9792c696658fe0cbedc187198fa463b6ff83fc.tar.bz2 |
Measure length of multi-character Asian words
-rw-r--r-- | inc/indexer.php | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index 7a8bb3ff8..d9eccac76 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -52,8 +52,10 @@ function wordlen($w){ $l = strlen($w); // If left alone, all chinese "words" will get put into w3.idx // So the "length" of a "word" is faked - if(preg_match('/'.IDX_ASIAN2.'/u',$w)) - $l += ord($w) - 0xE1; // Lead bytes from 0xE2-0xEF + if(preg_match_all('/[\xE2-\xEF]/',$w,$leadbytes)) { + foreach($leadbytes[0] as $b) + $l += ord($b) - 0xE1; + } return $l; } |