From 6db7468b987c4b5f9bcfdd7e98ceb1883c49a364 Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Sun, 5 May 2013 14:38:10 +0200 Subject: make sure loaded text has the right encoding When pages contain non-UTF8 chars (eg. when posted through a script or when edited on the filesystem, parts of DokuWiki can break resulting in missing page content. This fixes these problems by forcing the content to UTF-8 on load. This will result in bad characters for input that is not latin1 but contents will at least be visible. --- inc/common.php | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'inc') diff --git a/inc/common.php b/inc/common.php index 27f90b53b..110350951 100644 --- a/inc/common.php +++ b/inc/common.php @@ -781,11 +781,19 @@ function unlock($id) { /** * convert line ending to unix format * + * also makes sure the given text is valid UTF-8 + * * @see formText() for 2crlf conversion * @author Andreas Gohr */ function cleanText($text) { $text = preg_replace("/(\015\012)|(\015)/", "\012", $text); + + // if the text is not valid UTF-8 we simply assume latin1 + // this won't break any worse than it breaks with the wrong encoding + // but might actually fix the problem in many cases + if(!utf8_check($text)) $text = utf8_encode($text); + return $text; } -- cgit v1.2.3