summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--_test/cases/inc/pageutils_clean_id.test.php137
-rw-r--r--_test/index.php2
-rw-r--r--inc/parser/lexer.php17
-rw-r--r--inc/utf8.php4
4 files changed, 149 insertions, 11 deletions
diff --git a/_test/cases/inc/pageutils_clean_id.test.php b/_test/cases/inc/pageutils_clean_id.test.php
new file mode 100644
index 000000000..0b4f51571
--- /dev/null
+++ b/_test/cases/inc/pageutils_clean_id.test.php
@@ -0,0 +1,137 @@
+<?php
+require_once DOKU_INC.'inc/utf8.php';
+require_once DOKU_INC.'inc/pageutils.php';
+
+class init_clean_id_test extends UnitTestCase {
+
+
+ function test_default(){
+ // we test multiple cases here
+ // format: $id, $ascii, $correct_output
+ $tests = array();
+
+ // set dokuwiki defaults
+ global $conf;
+ $conf['sepchar'] = '_';
+ $conf['deaccent'] = 1;
+
+ $tests[] = array('page',false,'page');
+ $tests[] = array('pa_ge',false,'pa_ge');
+ $tests[] = array('pa%ge',false,'pa_ge');
+ $tests[] = array('pa#ge',false,'pa_ge');
+ $tests[] = array('pàge',false,'page');
+ $tests[] = array('pagĖ',false,'page');
+ $tests[] = array('pa$%^*#ge',false,'pa_ge');
+ $tests[] = array('*page*',false,'page');
+ $tests[] = array('ښ',false,'ښ');
+ $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ');
+ $tests[] = array('page:page',false,'page:page');
+ $tests[] = array('page;page',false,'page:page');
+
+ $conf['useslash'] = 0;
+ $tests[] = array('page/page',false,'page_page');
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+
+ $conf['useslash'] = 1;
+ $tests = array();
+ $tests[] = array('page/page',false,'page:page');
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+ }
+
+ function test_sepchar(){
+ // we test multiple cases here
+ // format: $id, $ascii, $correct_output
+ $tests = array();
+
+ global $conf;
+ $conf['sepchar'] = '-';
+ $conf['deaccent'] = 1;
+
+ $tests[] = array('pa-ge',false,'pa-ge');
+ $tests[] = array('pa%ge',false,'pa-ge');
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+ }
+
+ function test_deaccent_keep(){
+ // we test multiple cases here
+ // format: $id, $ascii, $correct_output
+ $tests = array();
+
+ global $conf;
+ $conf['sepchar'] = '_';
+ $conf['deaccent'] = 0;
+
+ $tests[] = array('pàge',false,'pàge');
+ $tests[] = array('pagĖ',false,'pagė');
+ $tests[] = array('pagĒēĔĕĖėĘęĚě',false,'pagēēĕĕėėęęěě');
+ $tests[] = array('ښ',false,'ښ');
+ $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ');
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+ }
+
+ function test_deaccent_romanize(){
+ // we test multiple cases here
+ // format: $id, $ascii, $correct_output
+ $tests = array();
+
+ global $conf;
+ $conf['sepchar'] = '_';
+ $conf['deaccent'] = 2;
+
+ $tests[] = array('pàge',false,'page');
+ $tests[] = array('pagĖ',false,'page');
+ $tests[] = array('pagĒēĔĕĖėĘęĚě',false,'pageeeeeeeeee');
+ $tests[] = array('ښ',false,'ښ');
+ $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ');
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+ }
+
+ function test_deaccent_ascii(){
+ // we test multiple cases here
+ // format: $id, $ascii, $correct_output
+ $tests = array();
+
+ global $conf;
+ $conf['sepchar'] = '_';
+ $conf['deaccent'] = 0;
+
+ $tests[] = array('pàge',true,'page');
+ $tests[] = array('pagĖ',true,'page');
+ $tests[] = array('pagĒēĔĕĖėĘęĚě',true,'pageeeeeeeeee');
+ $tests[] = array('ښ',true,'');
+ $tests[] = array('ښ侧化并곦ঝഈβ',true,'');
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+
+ $conf['deaccent'] = 1;
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+
+ $conf['deaccent'] = 2;
+
+ foreach($tests as $test){
+ $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+ }
+ }
+
+}
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/_test/index.php b/_test/index.php
index 99fae4b95..87cc10a35 100644
--- a/_test/index.php
+++ b/_test/index.php
@@ -49,7 +49,7 @@ function & DW_TESTS_GetReporter() {
break;
case DW_TESTS_OUTPUT_HTML:
default:
- $Reporter = new HTMLReporter();
+ $Reporter = new HTMLReporter('utf-8');
break;
}
}
diff --git a/inc/parser/lexer.php b/inc/parser/lexer.php
index 9c109f9bb..2175e6786 100644
--- a/inc/parser/lexer.php
+++ b/inc/parser/lexer.php
@@ -503,15 +503,16 @@ class Doku_Lexer {
if (isset($this->_mode_handlers[$handler])) {
$handler = $this->_mode_handlers[$handler];
}
- // modes starting with plugin_ are all handled by the same
- // handler but with an additional parameter
- if(substr($handler,0,7)=='plugin_'){
- list($handler,$plugin) = split('_',$handler,2);
- return $this->_parser->$handler($content, $is_match, $pos, $plugin);
- }
- return $this->_parser->$handler($content, $is_match, $pos);
- }
+ // modes starting with plugin_ are all handled by the same
+ // handler but with an additional parameter
+ if(substr($handler,0,7)=='plugin_'){
+ list($handler,$plugin) = split('_',$handler,2);
+ return $this->_parser->$handler($content, $is_match, $pos, $plugin);
+ }
+
+ return $this->_parser->$handler($content, $is_match, $pos);
+ }
/**
* Tries to match a chunk of text and if successful
diff --git a/inc/utf8.php b/inc/utf8.php
index cf2ff3b2a..ee8b4fc13 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -820,7 +820,7 @@ $UTF8_LOWER_ACCENTS = array(
'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o',
'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g',
'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a',
- 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u',
+ 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e',
);
/**
@@ -848,7 +848,7 @@ $UTF8_UPPER_ACCENTS = array(
'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O',
'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G',
'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A',
- 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae',
+ 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E',
);
/**