diff options
author | Andreas Gohr <andi@splitbrain.org> | 2013-05-05 14:38:10 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2013-05-05 14:38:10 +0200 |
commit | 6db7468b987c4b5f9bcfdd7e98ceb1883c49a364 (patch) | |
tree | 466b9cb7e5c224990c20a846ab7134a2ace97dc7 | |
parent | 46ac8ef9ac8c7465449cd3d33c21148c75241ac8 (diff) | |
download | rpg-6db7468b987c4b5f9bcfdd7e98ceb1883c49a364.tar.gz rpg-6db7468b987c4b5f9bcfdd7e98ceb1883c49a364.tar.bz2 |
make sure loaded text has the right encoding
When pages contain non-UTF8 chars (eg. when posted through a script or
when edited on the filesystem, parts of DokuWiki can break resulting in
missing page content. This fixes these problems by forcing the content
to UTF-8 on load. This will result in bad characters for input that is
not latin1 but contents will at least be visible.
-rw-r--r-- | inc/common.php | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/inc/common.php b/inc/common.php index 27f90b53b..110350951 100644 --- a/inc/common.php +++ b/inc/common.php @@ -781,11 +781,19 @@ function unlock($id) { /** * convert line ending to unix format * + * also makes sure the given text is valid UTF-8 + * * @see formText() for 2crlf conversion * @author Andreas Gohr <andi@splitbrain.org> */ function cleanText($text) { $text = preg_replace("/(\015\012)|(\015)/", "\012", $text); + + // if the text is not valid UTF-8 we simply assume latin1 + // this won't break any worse than it breaks with the wrong encoding + // but might actually fix the problem in many cases + if(!utf8_check($text)) $text = utf8_encode($text); + return $text; } |