summaryrefslogtreecommitdiff
path: root/inc/utf8.php
diff options
context:
space:
mode:
authorTom N Harris <tnharris@whoopdedo.org>2006-11-17 13:30:32 +0100
committerTom N Harris <tnharris@whoopdedo.org>2006-11-17 13:30:32 +0100
commitd5b233026fa2930e4c6ee362ebdfd3586b2f3fb8 (patch)
treed706907a5106ec596279c0655f891bb698203d29 /inc/utf8.php
parent9046bb990b22efdc8a26f2b002f603c22f73f038 (diff)
downloadrpg-d5b233026fa2930e4c6ee362ebdfd3586b2f3fb8.tar.gz
rpg-d5b233026fa2930e4c6ee362ebdfd3586b2f3fb8.tar.bz2
Indexer asian language fixes and speed-ups
Make Chinese and Japanese work better with the new indexer. Some missing punctuation added to utf8_stripspecials. Misc. other changes to make indexing faster. The indexes will expire on backend upgrades, so you don't have to delete *.indexed darcs-hash:20061117123032-6942e-774b38e08234928c49b37e40addba375acf67ac0.gz
Diffstat (limited to 'inc/utf8.php')
-rw-r--r--inc/utf8.php19
1 files changed, 16 insertions, 3 deletions
diff --git a/inc/utf8.php b/inc/utf8.php
index e32b64b17..8da981fd9 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -1027,11 +1027,20 @@ $UTF8_SPECIAL_CHARS = array(
0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
- 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
+ 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c,
+ 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017,
+ 0x3018, 0x3019, 0x301a, 0x301b, 0x3036,
+ 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,
+ 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09,
+ 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c,
+ 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b,
+ 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65,
+ 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea,
+ 0xffeb, 0xffec, 0xffed, 0xffee,
);
// utf8 version of above data
@@ -1051,8 +1060,12 @@ $UTF8_SPECIAL_CHARS2 =
'✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'.
'�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'.
'��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'.
- '➷➸➹➺➻➼➽➾�'.
- '�ﹼﹽ';
+ '➷➸➹➺➻➼➽➾'.
+ ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'.
+ '�'.
+ '�ﹼﹽ'.
+ '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'.
+ '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○';
/**
* Romanization lookup table