summaryrefslogtreecommitdiff
path: root/inc
diff options
context:
space:
mode:
authorChristopher Smith <chris@jalakai.co.uk>2013-01-26 16:51:16 +0000
committerChristopher Smith <chris@jalakai.co.uk>2013-01-26 16:51:16 +0000
commitfcfecb69832d5532b9c7d5362e4b7bb781c8fa11 (patch)
tree3b33d6c76bc83088d8700c9cf88b33be07d9b959 /inc
parentdfbe4adfd080433f91409f028935b9f9879fceca (diff)
downloadrpg-fcfecb69832d5532b9c7d5362e4b7bb781c8fa11.tar.gz
rpg-fcfecb69832d5532b9c7d5362e4b7bb781c8fa11.tar.bz2
fix for FS#2676, inserting zero length spaces into long sequences of non-breaking characters in diffs
Diffstat (limited to 'inc')
-rw-r--r--inc/html.php31
1 files changed, 28 insertions, 3 deletions
diff --git a/inc/html.php b/inc/html.php
index 5c1c75cf6..89a8a4c7d 100644
--- a/inc/html.php
+++ b/inc/html.php
@@ -1154,8 +1154,7 @@ function html_diff($text='',$intro=true,$type=null){
list($l_head, $r_head, $l_minor, $r_minor) = html_diff_head($l_rev, $r_rev);
}
- $df = new Diff(explode("\n",htmlspecialchars($l_text)),
- explode("\n",htmlspecialchars($r_text)));
+ $df = new Diff(explode("\n",hsc($l_text)),explode("\n",hsc($r_text)));
if($type == 'inline'){
$tdf = new InlineDiffFormatter();
@@ -1205,12 +1204,38 @@ function html_diff($text='',$intro=true,$type=null){
<?php echo $r_head?>
</th>
</tr>
- <?php echo $tdf->format($df)?>
+ <?php echo html_insert_softbreaks($tdf->format($df)); ?>
</table>
</div>
<?php
}
+function html_insert_softbreaks($diffhtml) {
+ // search the diff html string for both:
+ // - html tags, so these can be ignored
+ // - long strings of characters without breaking characters
+ return preg_replace_callback('/<[^>]*>|[^<> ]{12,}/','html_softbreak_callback',$diffhtml);
+}
+
+function html_softbreak_callback($match){
+ // if match is an html tag, return it intact
+ if ($match[0]{0} == '<') return $match[0];
+
+ // its a long string without a breaking character,
+ // make certain characters into breaking characters by inserting a
+ // breaking character (zero length space, U+200B / #8203) in front them.
+ $regex = <<< REGEX
+(?(?= # start a conditional expression with a positive look ahead ...
+&(\#\\d{1,4}|[[:alpha:]]{1,4});) # ... for html entities - we don't want to split them
+&\#?\\w{1,4}; # yes pattern - a quicker match for the html entity, since we know we have one
+|
+[?/,&\#;:]+ # no pattern - any other group of 'special' characters to insert a breaking character after
+) # end conditional expression
+REGEX;
+
+ return preg_replace('<'.$regex.'>xu','\0&#8203;',$match[0]);
+}
+
/**
* show warning on conflict detection
*