aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjoey <joey@0fa5a96a-9a0e-0410-b3b2-a0fd24251071>2006-08-03 21:50:47 +0000
committerjoey <joey@0fa5a96a-9a0e-0410-b3b2-a0fd24251071>2006-08-03 21:50:47 +0000
commite8b39b094116e8b50cf12fe56b9c6a04f05683e5 (patch)
tree515b58ce3cf340e0d7174402b26de29be1660218
parent714a5d6f7e22345a150f542745c15ce5d1027e96 (diff)
downloadikiwiki-e8b39b094116e8b50cf12fe56b9c6a04f05683e5.tar
ikiwiki-e8b39b094116e8b50cf12fe56b9c6a04f05683e5.tar.gz
* Try to handle relative links in aggregated feeds. However,
the current support is a crude hack due to limitations of XML::Feed: xml:base is not supported, neither is Content-Location. And of course, relative links in RSS feeds are ill-undefined..
-rw-r--r--IkiWiki/Plugin/aggregate.pm63
-rw-r--r--debian/changelog9
-rw-r--r--doc/todo/aggregation.mdwn3
3 files changed, 67 insertions, 8 deletions
diff --git a/IkiWiki/Plugin/aggregate.pm b/IkiWiki/Plugin/aggregate.pm
index 16db42a8f..d6592fef3 100644
--- a/IkiWiki/Plugin/aggregate.pm
+++ b/IkiWiki/Plugin/aggregate.pm
@@ -6,6 +6,9 @@ use warnings;
use strict;
use IkiWiki;
use HTML::Entities;
+use HTML::Parser;
+use HTML::Tagset;
+use URI;
my %feeds;
my %guids;
@@ -283,15 +286,13 @@ sub add_page (@) { #{{{
# Create the page.
my $template=IkiWiki::template("aggregatepost.tmpl", blind_cache => 1);
- my $content=$params{content};
- $params{content}=~s/(?<!\\)\[\[/\\\[\[/g; # escape accidental wikilinks
- # and preprocessor stuff
$template->param(title => $params{title})
if defined $params{title} && length($params{title});
- $template->param(content => $params{content});
+ $template->param(content => htmlescape(htmlabs($params{content}, $feed->{feedurl})));
$template->param(url => $feed->{url});
$template->param(name => $feed->{name});
- $template->param(link => $params{link}) if defined $params{link};
+ $template->param(link => urlabs($params{link}, $feed->{feedurl}))
+ if defined $params{link};
if (ref $feed->{tags}) {
$template->param(tags => [map { tag => $_ }, @{$feed->{tags}}]);
}
@@ -303,6 +304,58 @@ sub add_page (@) { #{{{
utime $mtime, $mtime, pagefile($guid->{page}) if defined $mtime;
} #}}}
+sub htmlescape ($) { #{{{
+ # escape accidental wikilinks and preprocessor stuff
+ my $html=shift;
+ $html=~s/(?<!\\)\[\[/\\\[\[/g;
+ return $html;
+} #}}}
+
+sub urlabs ($$) { #{{{
+ my $url=shift;
+ my $urlbase=shift;
+
+ URI->new_abs($url, $urlbase)->as_string;
+} #}}}
+
+sub htmlabs ($$) { #{{{
+ # Convert links in html from relative to absolute.
+ # Note that this is a heuristic, which is not specified by the rss
+ # spec and may not be right for all feeds. Also, see Debian
+ # bug #XXXX TODO: get bug.
+ my $html=shift;
+ my $urlbase=shift;
+
+ my $ret="";
+ my $p = HTML::Parser->new(api_version => 3);
+ $p->handler(default => sub { $ret.=join("", @_) }, "text");
+ $p->handler(start => sub {
+ my ($tagname, $pos, $text) = @_;
+ if (ref $HTML::Tagset::linkElements{$tagname}) {
+ while (4 <= @$pos) {
+ # use attribute sets from right to left
+ # to avoid invalidating the offsets
+ # when replacing the values
+ my($k_offset, $k_len, $v_offset, $v_len) =
+ splice(@$pos, -4);
+ my $attrname = lc(substr($text, $k_offset, $k_len));
+ next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}};
+ next unless $v_offset; # 0 v_offset means no value
+ my $v = substr($text, $v_offset, $v_len);
+ $v =~ s/^([\'\"])(.*)\1$/$2/;
+ my $new_v=urlabs($v, $urlbase);
+ $new_v =~ s/\"/&quot;/g; # since we quote with ""
+ substr($text, $v_offset, $v_len) = qq("$new_v");
+ }
+ }
+ $ret.=$text;
+ }, "tagname, tokenpos, text");
+ $p->parse($html);
+ $p->eof;
+
+ return $ret;
+} #}}}
+
sub remove_feeds () { #{{{
my $page=shift;
diff --git a/debian/changelog b/debian/changelog
index aacdbe52f..ab053e496 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,12 @@
+ikiwiki (1.16) UNRELEASED; urgency=low
+
+ * Try to handle relative links in aggregated feeds. However,
+ the current support is a crude hack due to limitations of XML::Feed:
+ xml:base is not supported, neither is Content-Location. And of course,
+ relative links in RSS feeds are ill-undefined..
+
+ -- Joey Hess <joeyh@debian.org> Thu, 3 Aug 2006 17:29:51 -0400
+
ikiwiki (1.15) unstable; urgency=low
* Remove CDPATH and other env vars perl taint checking doesn't like.
diff --git a/doc/todo/aggregation.mdwn b/doc/todo/aggregation.mdwn
index 5abb6a53e..dec242ea6 100644
--- a/doc/todo/aggregation.mdwn
+++ b/doc/todo/aggregation.mdwn
@@ -2,6 +2,3 @@
* Need to store page author metadata and include it in the rss feed.
Permalink to? Also, that stuff could be presented better in the html blog
view, also using the metadata.
-* Some rss feeds contain relative links or relative urls to inline images,
- which break when aggregated. Do I need to parse the html and make them
- all absolute?