4 files changed, 149 insertions, 11 deletions
diff --git a/_test/cases/inc/pageutils_clean_id.test.php b/_test/cases/inc/pageutils_clean_id.test.php
new file mode 100644
index 000000000..0b4f51571
--- /dev/null
+++ b/_test/cases/inc/pageutils_clean_id.test.php
@@ -0,0 +1,137 @@
+<?php
+require_once DOKU_INC.'inc/utf8.php';
+require_once DOKU_INC.'inc/pageutils.php';
+
+class init_clean_id_test extends UnitTestCase {
+
+
+    function test_default(){
+        // we test multiple cases here
+        // format: $id, $ascii, $correct_output
+        $tests   = array();
+
+        // set dokuwiki defaults
+        global $conf;
+        $conf['sepchar'] = '_';
+        $conf['deaccent'] = 1;
+
+        $tests[] = array('page',false,'page');
+        $tests[] = array('pa_ge',false,'pa_ge');
+        $tests[] = array('pa%ge',false,'pa_ge');
+        $tests[] = array('pa#ge',false,'pa_ge');
+        $tests[] = array('pàge',false,'page');
+        $tests[] = array('pagĖ',false,'page');
+        $tests[] = array('pa$%^*#ge',false,'pa_ge');
+        $tests[] = array('*page*',false,'page');
+        $tests[] = array('ښ',false,'ښ');
+        $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ');
+        $tests[] = array('page:page',false,'page:page');
+        $tests[] = array('page;page',false,'page:page');
+
+        $conf['useslash'] = 0;
+        $tests[] = array('page/page',false,'page_page');
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+
+        $conf['useslash'] = 1;
+        $tests = array();
+        $tests[] = array('page/page',false,'page:page');
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+    }
+
+    function test_sepchar(){
+        // we test multiple cases here
+        // format: $id, $ascii, $correct_output
+        $tests   = array();
+
+        global $conf;
+        $conf['sepchar'] = '-';
+        $conf['deaccent'] = 1;
+
+        $tests[] = array('pa-ge',false,'pa-ge');
+        $tests[] = array('pa%ge',false,'pa-ge');
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+    }
+
+    function test_deaccent_keep(){
+        // we test multiple cases here
+        // format: $id, $ascii, $correct_output
+        $tests   = array();
+
+        global $conf;
+        $conf['sepchar'] = '_';
+        $conf['deaccent'] = 0;
+
+        $tests[] = array('pàge',false,'pàge');
+        $tests[] = array('pagĖ',false,'pagė');
+        $tests[] = array('pagĒēĔĕĖėĘęĚě',false,'pagēēĕĕėėęęěě');
+        $tests[] = array('ښ',false,'ښ');
+        $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ');
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+    }
+
+    function test_deaccent_romanize(){
+        // we test multiple cases here
+        // format: $id, $ascii, $correct_output
+        $tests   = array();
+
+        global $conf;
+        $conf['sepchar'] = '_';
+        $conf['deaccent'] = 2;
+
+        $tests[] = array('pàge',false,'page');
+        $tests[] = array('pagĖ',false,'page');
+        $tests[] = array('pagĒēĔĕĖėĘęĚě',false,'pageeeeeeeeee');
+        $tests[] = array('ښ',false,'ښ');
+        $tests[] = array('ښ侧化并곦ঝഈβ',false,'ښ侧化并곦ঝഈ');
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+    }
+
+    function test_deaccent_ascii(){
+        // we test multiple cases here
+        // format: $id, $ascii, $correct_output
+        $tests   = array();
+
+        global $conf;
+        $conf['sepchar'] = '_';
+        $conf['deaccent'] = 0;
+
+        $tests[] = array('pàge',true,'page');
+        $tests[] = array('pagĖ',true,'page');
+        $tests[] = array('pagĒēĔĕĖėĘęĚě',true,'pageeeeeeeeee');
+        $tests[] = array('ښ',true,'');
+        $tests[] = array('ښ侧化并곦ঝഈβ',true,'');
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+
+        $conf['deaccent'] = 1;
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+
+        $conf['deaccent'] = 2;
+
+        foreach($tests as $test){
+            $this->assertEqual(cleanID($test[0],$test[1]),$test[2]);
+        }
+    }
+
+}
+//Setup VIM: ex: et ts=4 enc=utf-8 :
diff --git a/_test/index.php b/_test/index.php
index 99fae4b95..87cc10a35 100644
--- a/_test/index.php
+++ b/_test/index.php
@@ -49,7 +49,7 @@ function & DW_TESTS_GetReporter() {
             break;
             case DW_TESTS_OUTPUT_HTML:
             default:
-                $Reporter = new HTMLReporter();
+                $Reporter = new HTMLReporter('utf-8');
             break;
         }
     }
diff --git a/inc/parser/lexer.php b/inc/parser/lexer.php
index 9c109f9bb..2175e6786 100644
--- a/inc/parser/lexer.php
+++ b/inc/parser/lexer.php
@@ -503,15 +503,16 @@ class Doku_Lexer {
         if (isset($this->_mode_handlers[$handler])) {
             $handler = $this->_mode_handlers[$handler];
         }
-    // modes starting with plugin_ are all handled by the same
-    // handler but with an additional parameter
-    if(substr($handler,0,7)=='plugin_'){
-      list($handler,$plugin) = split('_',$handler,2);
-          return $this->_parser->$handler($content, $is_match, $pos, $plugin);
-    }
 
-        return $this->_parser->$handler($content, $is_match, $pos);
-    }
+        // modes starting with plugin_ are all handled by the same
+        // handler but with an additional parameter
+        if(substr($handler,0,7)=='plugin_'){
+          list($handler,$plugin) = split('_',$handler,2);
+              return $this->_parser->$handler($content, $is_match, $pos, $plugin);
+        }
+
+            return $this->_parser->$handler($content, $is_match, $pos);
+        }
 
     /**
      *    Tries to match a chunk of text and if successful
diff --git a/inc/utf8.php b/inc/utf8.php
index cf2ff3b2a..ee8b4fc13 100644
--- a/inc/utf8.php
+++ b/inc/utf8.php
@@ -820,7 +820,7 @@ $UTF8_LOWER_ACCENTS = array(
   'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o',
   'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g',
   'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a',
-  'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u',
+  'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 
 );
 
 /**
@@ -848,7 +848,7 @@ $UTF8_UPPER_ACCENTS = array(
   'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O',
   'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G',
   'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A',
-  'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae',
+  'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E',
 );
 
 /**