summaryrefslogtreecommitdiff
path: root/inc
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2013-05-05 14:38:10 +0200
committerAndreas Gohr <andi@splitbrain.org>2013-05-05 14:38:10 +0200
commit6db7468b987c4b5f9bcfdd7e98ceb1883c49a364 (patch)
tree466b9cb7e5c224990c20a846ab7134a2ace97dc7 /inc
parent46ac8ef9ac8c7465449cd3d33c21148c75241ac8 (diff)
downloadrpg-6db7468b987c4b5f9bcfdd7e98ceb1883c49a364.tar.gz
rpg-6db7468b987c4b5f9bcfdd7e98ceb1883c49a364.tar.bz2
make sure loaded text has the right encoding
When pages contain non-UTF8 chars (eg. when posted through a script or when edited on the filesystem, parts of DokuWiki can break resulting in missing page content. This fixes these problems by forcing the content to UTF-8 on load. This will result in bad characters for input that is not latin1 but contents will at least be visible.
Diffstat (limited to 'inc')
-rw-r--r--inc/common.php8
1 files changed, 8 insertions, 0 deletions
diff --git a/inc/common.php b/inc/common.php
index 27f90b53b..110350951 100644
--- a/inc/common.php
+++ b/inc/common.php
@@ -781,11 +781,19 @@ function unlock($id) {
/**
* convert line ending to unix format
*
+ * also makes sure the given text is valid UTF-8
+ *
* @see formText() for 2crlf conversion
* @author Andreas Gohr <andi@splitbrain.org>
*/
function cleanText($text) {
$text = preg_replace("/(\015\012)|(\015)/", "\012", $text);
+
+ // if the text is not valid UTF-8 we simply assume latin1
+ // this won't break any worse than it breaks with the wrong encoding
+ // but might actually fix the problem in many cases
+ if(!utf8_check($text)) $text = utf8_encode($text);
+
return $text;
}