From 176ae32b3f32b45a6ff8c81921aa9fbed2e28ace Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sat, 11 Oct 2008 11:10:34 +0200 Subject: Do romanization of certain characters different from what deaccent does FS#1117 Some characters are deaccented/romanized different in different languages, we now do one way in deaccent and the other way in romanize. Giving the user a choice what she prefers. (Currently affects a handful scandinavian letters). darcs-hash:20081011091034-7ad00-08535e03639b0b0c634e2438609ac10545f14f48.gz --- _test/cases/inc/utf8_romanize.test.php | 10 ++++++++++ inc/utf8.php | 3 +++ 2 files changed, 13 insertions(+) diff --git a/_test/cases/inc/utf8_romanize.test.php b/_test/cases/inc/utf8_romanize.test.php index 28595b594..9aade570b 100644 --- a/_test/cases/inc/utf8_romanize.test.php +++ b/_test/cases/inc/utf8_romanize.test.php @@ -22,5 +22,15 @@ class utf8_romanize_test extends UnitTestCase { $line++; } } + + /** + * Test romanization of character that would usually be deaccented in a different + * way FS#1117 + * + * @author Andreas Gohr + */ + function test_deaccented(){ + $this->assertEqual("a A a A a o O",utf8_romanize("å Å ä Ä ä ö Ö")); + } } //Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/inc/utf8.php b/inc/utf8.php index 99e2fcaa1..966c40c6c 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -1184,6 +1184,9 @@ $UTF8_SPECIAL_CHARS2 = */ global $UTF8_ROMANIZATION; $UTF8_ROMANIZATION = array( + // scandinavian - differs from what we do in deaccent + 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', + //russian cyrillic 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', -- cgit v1.2.3