aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon McVittie <smcv@ http://smcv.pseudorandom.co.uk/>2008-11-16 18:11:39 +0000
committerSimon McVittie <smcv@ http://smcv.pseudorandom.co.uk/>2008-11-17 10:46:21 +0000
commite7a840ed9a817cf4db59c90e680afd89e146b581 (patch)
tree73b0d9e81f4bd9d7be9b65f42edbcd15ca847378
parent408d483dc2938af527100f2201ceea0efb5019af (diff)
downloadikiwiki-e7a840ed9a817cf4db59c90e680afd89e146b581.tar
ikiwiki-e7a840ed9a817cf4db59c90e680afd89e146b581.tar.gz
htmlbalance: new plugin that balances tags by parsing and re-serializing
-rw-r--r--IkiWiki/Plugin/htmlbalance.pm57
-rw-r--r--doc/plugins/aggregate.mdwn6
-rw-r--r--doc/plugins/htmlbalance.mdwn9
-rw-r--r--doc/plugins/htmltidy.mdwn3
-rwxr-xr-xt/htmlbalance.t13
5 files changed, 84 insertions, 4 deletions
diff --git a/IkiWiki/Plugin/htmlbalance.pm b/IkiWiki/Plugin/htmlbalance.pm
new file mode 100644
index 000000000..667d73b6c
--- /dev/null
+++ b/IkiWiki/Plugin/htmlbalance.pm
@@ -0,0 +1,57 @@
+#!/usr/bin/perl
+package IkiWiki::Plugin::htmlbalance;
+
+# htmlbalance: Parse and re-serialize HTML to ensure balanced tags
+#
+# Copyright 2008 Simon McVittie <http://smcv.pseudorandom.co.uk/>
+# Licensed under the GNU GPL, version 2, or any later version published by the
+# Free Software Foundation
+
+use warnings;
+use strict;
+use IkiWiki 2.00;
+
+sub import { #{{{
+ hook(type => "getsetup", id => "htmlbalance", call => \&getsetup);
+ hook(type => "sanitize", id => "htmlbalance", call => \&sanitize);
+} # }}}
+
+sub getsetup () { #{{{
+ return
+ plugin => {
+ safe => 1,
+ rebuild => undef,
+ },
+} #}}}
+
+sub sanitize (@) { #{{{
+ my %params=@_;
+ my $ret = '';
+
+ eval {
+ use HTML::TreeBuilder;
+ use XML::Atom::Util qw(encode_xml);
+ };
+
+ if ($@) {
+ error($@);
+ return $params{content};
+ }
+
+ my $tree = HTML::TreeBuilder->new_from_content($params{content});
+ my @nodes = $tree->disembowel();
+ foreach my $node (@nodes) {
+ if (ref $node) {
+ $ret .= $node->as_XML();
+ chomp $ret;
+ $node->delete();
+ }
+ else {
+ $ret .= encode_xml($node);
+ }
+ }
+ $tree->delete();
+ return $ret;
+} # }}}
+
+1
diff --git a/doc/plugins/aggregate.mdwn b/doc/plugins/aggregate.mdwn
index c40a6dc22..6fc87853b 100644
--- a/doc/plugins/aggregate.mdwn
+++ b/doc/plugins/aggregate.mdwn
@@ -9,9 +9,9 @@ New users of aggregate should enable the `aggregateinternal => 1` option in the
.setup file. If you don't do so, you will need to enable the [[html]] plugin
as well as aggregate itself, since feed entries will be stored as HTML.
-The [[meta]] and [[tag]] plugins are also recommended. The
-[[htmltidy]] plugin is suggested, since feeds can easily contain html
-problems, some of which tidy can fix.
+The [[meta]] and [[tag]] plugins are also recommended. Either the
+[[htmltidy]] or [[htmlbalance]] plugin is suggested, since feeds can easily
+contain html problems, some of which these plugins can fix.
You will need to run ikiwiki periodically from a cron job, passing it the
--aggregate parameter, to make it check for new posts. Here's an example
diff --git a/doc/plugins/htmlbalance.mdwn b/doc/plugins/htmlbalance.mdwn
new file mode 100644
index 000000000..7cdb1f950
--- /dev/null
+++ b/doc/plugins/htmlbalance.mdwn
@@ -0,0 +1,9 @@
+[[!template id=plugin name=htmlbalance author="Simon McVittie"]]
+[[!tag type/html]]
+
+This plugin ensures that the HTML emitted by ikiwiki contains well-balanced
+HTML tags, by parsing it with HTML::TreeBuilder and re-serializing it. This
+acts as a lighter-weight alternative to [[plugins/htmltidy]]; it doesn't
+ensure validity, but it does at least ensure that formatting from a
+blog post pulled in by \[[![[ikiwiki/directive/inline]]]] doesn't
+leak into the rest of the page.
diff --git a/doc/plugins/htmltidy.mdwn b/doc/plugins/htmltidy.mdwn
index f675a01ae..580e56f59 100644
--- a/doc/plugins/htmltidy.mdwn
+++ b/doc/plugins/htmltidy.mdwn
@@ -7,4 +7,5 @@ emitted by ikiwiki. Besides being nicely formatted, this helps ensure that
even if users enter suboptimal html, your wiki generates valid html.
Note that since tidy is an external program, that is run each time a page
-is built, this plugin will slow ikiwiki down somewhat.
+is built, this plugin will slow ikiwiki down somewhat. [[plugins/htmlbalance]]
+might provide a faster alternative.
diff --git a/t/htmlbalance.t b/t/htmlbalance.t
new file mode 100755
index 000000000..cd124e473
--- /dev/null
+++ b/t/htmlbalance.t
@@ -0,0 +1,13 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use Test::More tests => 7;
+
+BEGIN { use_ok("IkiWiki::Plugin::htmlbalance"); }
+
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<br></br>"), "<br />");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<div><p b=\"c\">hello world</div>"), "<div><p b=\"c\">hello world</p></div>");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<a></a></a>"), "<a></a>");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<b>foo <a</b>"), "<b>foo </b>");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<b> foo <a</a></b>"), "<b> foo </b>");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "a>"), "a&gt;");