From c41c03f3ff20658df6ec6c2e7808c5051a802a70 Mon Sep 17 00:00:00 2001 From: andi Date: Mon, 17 Jan 2005 20:21:12 +0100 Subject: cleanID UTF8 patch darcs-hash:20050117192112-9977f-084e54a393e8ff0af3954e3a51b8072931529f9b.gz --- inc/common.php | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'inc/common.php') diff --git a/inc/common.php b/inc/common.php index eb7902106..ac5332a3e 100644 --- a/inc/common.php +++ b/inc/common.php @@ -8,6 +8,7 @@ require_once("conf/dokuwiki.php"); require_once("inc/io.php"); + require_once('inc/utf8.php'); //set up error reporting to sane values error_reporting(E_ALL ^ E_NOTICE); @@ -392,32 +393,41 @@ function cleanID($id){ global $conf; global $lang; $id = trim($id); - $id = strtolower($id); + $id = utf8_strtolower($id); //alternative namespace seperator $id = strtr($id,';',':'); if($conf['useslash']) $id = strtr($id,'/',':'); + //FIXME use config to ask for deaccenting + $id = utf8_deaccent($id,-1); + + //remove specials (only ascii specials are removed) + $id = preg_replace('#[ !"§$%&()\[\]{}\\?`\'\#~*+=,<>\|^°@µ¹²³¼½¬]#u','_',$id); + +/* DELETEME legacy code if(!$conf['localnames']){ if($lang['encoding'] == 'iso-8859-15'){ // replace accented chars with unaccented ones // this may look strange on your terminal - just don't touch $id = strtr( strtr($id, - '', + 'ŠŽšžŸÀÁÂÃÅÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÝàáâãåçèéêëìíîïñòóôõøùúûýÿ', 'szszyaaaaaceeeeiiiinooooouuuyaaaaaceeeeiiiinooooouuuyy'), - array('' => 'th', '' => 'th', '' => 'dh', '' => 'dh', '' => 'ss', - '' => 'oe', '' => 'oe', '' => 'ae', '' => 'ae', '' => 'u', - '' => 'ue', '' => 'oe', '' => 'ae', '' => 'ue', '' => '', - '' => 'ae')); + array('Þ' => 'th', 'þ' => 'th', 'Ð' => 'dh', 'ð' => 'dh', 'ß' => 'ss', + 'Œ' => 'oe', 'œ' => 'oe', 'Æ' => 'ae', 'æ' => 'ae', 'µ' => 'u', + 'ü' => 'ue', 'ö' => 'oe', 'ä' => 'ae', 'Ü' => 'ue', 'Ö' => 'ö', + 'Ä' => 'ae')); } $WORD = 'a-z'; }else{ $WORD = '\w'; } - //special chars left will be converted to _ $id = preg_replace('#[^'.$WORD.'0-9:\-\.]#','_',$id); +*/ + + //clean up $id = preg_replace('#__#','_',$id); $id = preg_replace('#:+#',':',$id); $id = trim($id,':._-'); -- cgit v1.2.3