From e7a840ed9a817cf4db59c90e680afd89e146b581 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Sun, 16 Nov 2008 18:11:39 +0000 Subject: htmlbalance: new plugin that balances tags by parsing and re-serializing --- IkiWiki/Plugin/htmlbalance.pm | 57 +++++++++++++++++++++++++++++++++++++++++++ doc/plugins/aggregate.mdwn | 6 ++--- doc/plugins/htmlbalance.mdwn | 9 +++++++ doc/plugins/htmltidy.mdwn | 3 ++- t/htmlbalance.t | 13 ++++++++++ 5 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 IkiWiki/Plugin/htmlbalance.pm create mode 100644 doc/plugins/htmlbalance.mdwn create mode 100755 t/htmlbalance.t diff --git a/IkiWiki/Plugin/htmlbalance.pm b/IkiWiki/Plugin/htmlbalance.pm new file mode 100644 index 000000000..667d73b6c --- /dev/null +++ b/IkiWiki/Plugin/htmlbalance.pm @@ -0,0 +1,57 @@ +#!/usr/bin/perl +package IkiWiki::Plugin::htmlbalance; + +# htmlbalance: Parse and re-serialize HTML to ensure balanced tags +# +# Copyright 2008 Simon McVittie +# Licensed under the GNU GPL, version 2, or any later version published by the +# Free Software Foundation + +use warnings; +use strict; +use IkiWiki 2.00; + +sub import { #{{{ + hook(type => "getsetup", id => "htmlbalance", call => \&getsetup); + hook(type => "sanitize", id => "htmlbalance", call => \&sanitize); +} # }}} + +sub getsetup () { #{{{ + return + plugin => { + safe => 1, + rebuild => undef, + }, +} #}}} + +sub sanitize (@) { #{{{ + my %params=@_; + my $ret = ''; + + eval { + use HTML::TreeBuilder; + use XML::Atom::Util qw(encode_xml); + }; + + if ($@) { + error($@); + return $params{content}; + } + + my $tree = HTML::TreeBuilder->new_from_content($params{content}); + my @nodes = $tree->disembowel(); + foreach my $node (@nodes) { + if (ref $node) { + $ret .= $node->as_XML(); + chomp $ret; + $node->delete(); + } + else { + $ret .= encode_xml($node); + } + } + $tree->delete(); + return $ret; +} # }}} + +1 diff --git a/doc/plugins/aggregate.mdwn b/doc/plugins/aggregate.mdwn index c40a6dc22..6fc87853b 100644 --- a/doc/plugins/aggregate.mdwn +++ b/doc/plugins/aggregate.mdwn @@ -9,9 +9,9 @@ New users of aggregate should enable the `aggregateinternal => 1` option in the .setup file. If you don't do so, you will need to enable the [[html]] plugin as well as aggregate itself, since feed entries will be stored as HTML. -The [[meta]] and [[tag]] plugins are also recommended. The -[[htmltidy]] plugin is suggested, since feeds can easily contain html -problems, some of which tidy can fix. +The [[meta]] and [[tag]] plugins are also recommended. Either the +[[htmltidy]] or [[htmlbalance]] plugin is suggested, since feeds can easily +contain html problems, some of which these plugins can fix. You will need to run ikiwiki periodically from a cron job, passing it the --aggregate parameter, to make it check for new posts. Here's an example diff --git a/doc/plugins/htmlbalance.mdwn b/doc/plugins/htmlbalance.mdwn new file mode 100644 index 000000000..7cdb1f950 --- /dev/null +++ b/doc/plugins/htmlbalance.mdwn @@ -0,0 +1,9 @@ +[[!template id=plugin name=htmlbalance author="Simon McVittie"]] +[[!tag type/html]] + +This plugin ensures that the HTML emitted by ikiwiki contains well-balanced +HTML tags, by parsing it with HTML::TreeBuilder and re-serializing it. This +acts as a lighter-weight alternative to [[plugins/htmltidy]]; it doesn't +ensure validity, but it does at least ensure that formatting from a +blog post pulled in by \[[![[ikiwiki/directive/inline]]]] doesn't +leak into the rest of the page. diff --git a/doc/plugins/htmltidy.mdwn b/doc/plugins/htmltidy.mdwn index f675a01ae..580e56f59 100644 --- a/doc/plugins/htmltidy.mdwn +++ b/doc/plugins/htmltidy.mdwn @@ -7,4 +7,5 @@ emitted by ikiwiki. Besides being nicely formatted, this helps ensure that even if users enter suboptimal html, your wiki generates valid html. Note that since tidy is an external program, that is run each time a page -is built, this plugin will slow ikiwiki down somewhat. +is built, this plugin will slow ikiwiki down somewhat. [[plugins/htmlbalance]] +might provide a faster alternative. diff --git a/t/htmlbalance.t b/t/htmlbalance.t new file mode 100755 index 000000000..cd124e473 --- /dev/null +++ b/t/htmlbalance.t @@ -0,0 +1,13 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Test::More tests => 7; + +BEGIN { use_ok("IkiWiki::Plugin::htmlbalance"); } + +is(IkiWiki::Plugin::htmlbalance::sanitize(content => "

"), "
"); +is(IkiWiki::Plugin::htmlbalance::sanitize(content => "

hello world

"), "

hello world

"); +is(IkiWiki::Plugin::htmlbalance::sanitize(content => ""), ""); +is(IkiWiki::Plugin::htmlbalance::sanitize(content => "foo "), "foo "); +is(IkiWiki::Plugin::htmlbalance::sanitize(content => " foo "), " foo "); +is(IkiWiki::Plugin::htmlbalance::sanitize(content => "a>"), "a>"); -- cgit v1.2.3