diff options
-rw-r--r-- | _test/cases/inc/pageutils_clean_id.test.php | 137 | ||||
-rw-r--r-- | _test/index.php | 2 | ||||
-rw-r--r-- | inc/parser/lexer.php | 17 | ||||
-rw-r--r-- | inc/utf8.php | 4 |
4 files changed, 149 insertions, 11 deletions
diff --git a/_test/cases/inc/pageutils_clean_id.test.php b/_test/cases/inc/pageutils_clean_id.test.php new file mode 100644 index 000000000..0b4f51571 --- /dev/null +++ b/_test/cases/inc/pageutils_clean_id.test.php @@ -0,0 +1,137 @@ +<?php +require_once DOKU_INC.'inc/utf8.php'; +require_once DOKU_INC.'inc/pageutils.php'; + +class init_clean_id_test extends UnitTestCase { + + + function test_default(){ + // we test multiple cases here + // format: $id, $ascii, $correct_output + $tests = array(); + + // set dokuwiki defaults + global $conf; + $conf['sepchar'] = '_'; + $conf['deaccent'] = 1; + + $tests[] = array('page',false,'page'); + $tests[] = array('pa_ge',false,'pa_ge'); + $tests[] = array('pa%ge',false,'pa_ge'); + $tests[] = array('pa#ge',false,'pa_ge'); + $tests[] = array('pàge',false,'page'); + $tests[] = array('pagĖ',false,'page'); + $tests[] = array('pa$%^*#ge',false,'pa_ge'); + $tests[] = array('*page*',false,'page'); + $tests[] = array('ښ',false,'ښ'); + $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ'); + $tests[] = array('page:page',false,'page:page'); + $tests[] = array('page;page',false,'page:page'); + + $conf['useslash'] = 0; + $tests[] = array('page/page',false,'page_page'); + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + + $conf['useslash'] = 1; + $tests = array(); + $tests[] = array('page/page',false,'page:page'); + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + } + + function test_sepchar(){ + // we test multiple cases here + // format: $id, $ascii, $correct_output + $tests = array(); + + global $conf; + $conf['sepchar'] = '-'; + $conf['deaccent'] = 1; + + $tests[] = array('pa-ge',false,'pa-ge'); + $tests[] = array('pa%ge',false,'pa-ge'); + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + } + + function test_deaccent_keep(){ + // we test multiple cases here + // format: $id, $ascii, $correct_output + $tests = array(); + + global $conf; + $conf['sepchar'] = '_'; + $conf['deaccent'] = 0; + + $tests[] = array('pàge',false,'pàge'); + $tests[] = array('pagĖ',false,'pagė'); + $tests[] = array('pagĒēĔĕĖėĘęĚě',false,'pagēēĕĕėėęęěě'); + $tests[] = array('ښ',false,'ښ'); + $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ'); + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + } + + function test_deaccent_romanize(){ + // we test multiple cases here + // format: $id, $ascii, $correct_output + $tests = array(); + + global $conf; + $conf['sepchar'] = '_'; + $conf['deaccent'] = 2; + + $tests[] = array('pàge',false,'page'); + $tests[] = array('pagĖ',false,'page'); + $tests[] = array('pagĒēĔĕĖėĘęĚě',false,'pageeeeeeeeee'); + $tests[] = array('ښ',false,'ښ'); + $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ'); + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + } + + function test_deaccent_ascii(){ + // we test multiple cases here + // format: $id, $ascii, $correct_output + $tests = array(); + + global $conf; + $conf['sepchar'] = '_'; + $conf['deaccent'] = 0; + + $tests[] = array('pàge',true,'page'); + $tests[] = array('pagĖ',true,'page'); + $tests[] = array('pagĒēĔĕĖėĘęĚě',true,'pageeeeeeeeee'); + $tests[] = array('ښ',true,''); + $tests[] = array('ښ侧化并곦ঝഈβ',true,''); + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + + $conf['deaccent'] = 1; + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + + $conf['deaccent'] = 2; + + foreach($tests as $test){ + $this->assertEqual(cleanID($test[0],$test[1]),$test[2]); + } + } + +} +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/_test/index.php b/_test/index.php index 99fae4b95..87cc10a35 100644 --- a/_test/index.php +++ b/_test/index.php @@ -49,7 +49,7 @@ function & DW_TESTS_GetReporter() { break; case DW_TESTS_OUTPUT_HTML: default: - $Reporter = new HTMLReporter(); + $Reporter = new HTMLReporter('utf-8'); break; } } diff --git a/inc/parser/lexer.php b/inc/parser/lexer.php index 9c109f9bb..2175e6786 100644 --- a/inc/parser/lexer.php +++ b/inc/parser/lexer.php @@ -503,15 +503,16 @@ class Doku_Lexer { if (isset($this->_mode_handlers[$handler])) { $handler = $this->_mode_handlers[$handler]; } - // modes starting with plugin_ are all handled by the same - // handler but with an additional parameter - if(substr($handler,0,7)=='plugin_'){ - list($handler,$plugin) = split('_',$handler,2); - return $this->_parser->$handler($content, $is_match, $pos, $plugin); - } - return $this->_parser->$handler($content, $is_match, $pos); - } + // modes starting with plugin_ are all handled by the same + // handler but with an additional parameter + if(substr($handler,0,7)=='plugin_'){ + list($handler,$plugin) = split('_',$handler,2); + return $this->_parser->$handler($content, $is_match, $pos, $plugin); + } + + return $this->_parser->$handler($content, $is_match, $pos); + } /** * Tries to match a chunk of text and if successful diff --git a/inc/utf8.php b/inc/utf8.php index cf2ff3b2a..ee8b4fc13 100644 --- a/inc/utf8.php +++ b/inc/utf8.php @@ -820,7 +820,7 @@ $UTF8_LOWER_ACCENTS = array( 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', - 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', + 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', ); /** @@ -848,7 +848,7 @@ $UTF8_UPPER_ACCENTS = array( 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', - 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', + 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', ); /** |