From 8a831f2bba632ebf9e24d0f2f407c5b42cebffe5 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Fri, 10 Feb 2006 21:06:27 +0100 Subject: romanization support in utf8 library This patch addes basic romanization support to the utf-8 library. It converts non-latin languages to ASCII. The transliteration tables used where gathered from various places on the net. I do not speak any of those languages so I can't say how good they are. Any recommendations and fixes are welcome! This can be enabled for ID cleaning by setting the deaccent option to 2. It is also used in the XHTML renderer to generate section ids based on the header titles. Leading digits and any remaining non-ASCII chars are removed as well. This is the first step to make section ID always XHTML compatible. Making sure they are unique is not implemented yet. darcs-hash:20060210200627-7ad00-61a633563bb92a00ef4a3f699d73117139cbf367.gz --- inc/parser/xhtml.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'inc/parser') diff --git a/inc/parser/xhtml.php b/inc/parser/xhtml.php index 31246125b..5e898ab36 100644 --- a/inc/parser/xhtml.php +++ b/inc/parser/xhtml.php @@ -967,8 +967,14 @@ class Doku_Renderer_xhtml extends Doku_Renderer { return htmlspecialchars($string); } + /** + * Creates a linkid from a headline + */ function _headerToLink($title) { - return str_replace(':','',cleanID($title)); + $title = str_replace(':','',cleanID($title,true)); + $title = ltrim($title,'0123456789'); + if(empty($title)) $title='section'; + return $title; } /** -- cgit v1.2.3