diff options
author | Joey Hess <joey@gnu.kitenet.net> | 2009-10-08 20:27:56 -0400 |
---|---|---|
committer | Joey Hess <joey@gnu.kitenet.net> | 2009-10-08 20:27:56 -0400 |
commit | 3bb00d142a4d31316fc6f0eb1277aa2fafe4dbe9 (patch) | |
tree | 6f17cac8538244cfd35ba403f8e5fe0e7401fc26 | |
parent | 4b8ca7cfc147b2016b17cc88a21052a7ee6d46fb (diff) | |
download | ikiwiki-3bb00d142a4d31316fc6f0eb1277aa2fafe4dbe9.tar ikiwiki-3bb00d142a4d31316fc6f0eb1277aa2fafe4dbe9.tar.gz |
Optimize away most expensive file prune calls, when refreshing
Benchmarking refresh of a a wiki with 25 thousand pages showed
file_pruned() using most of the time. But, when refreshing, ikiwiki already
knows about nearly all the files. So we can skip calling file_pruned() for
those it knows about. While tricky to do, this sped up a refresh (that
otherwise does no work) by 10-50%.
-rw-r--r-- | IkiWiki.pm | 17 | ||||
-rw-r--r-- | IkiWiki/Render.pm | 71 | ||||
-rw-r--r-- | debian/changelog | 2 |
3 files changed, 49 insertions, 41 deletions
diff --git a/IkiWiki.pm b/IkiWiki.pm index 97d84c9de..d667e7e10 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -32,7 +32,6 @@ our $installdir='/usr'; # INSTALLDIR_AUTOREPLACE done by Makefile, DNE use Memoize; memoize("abs2rel"); memoize("pagespec_translate"); -memoize("file_pruned"); memoize("template_file"); sub getsetup () { @@ -1770,14 +1769,18 @@ sub add_depends ($$) { return 1; } -sub file_pruned ($$) { - require File::Spec; - my $file=File::Spec->canonpath(shift); - my $base=File::Spec->canonpath(shift); - $file =~ s#^\Q$base\E/+##; +sub file_pruned ($;$) { + my $file=shift; + if (@_) { + require File::Spec; + $file=File::Spec->canonpath($file); + my $base=File::Spec->canonpath(shift); + return if $file eq $base; + $file =~ s#^\Q$base\E/+##; + } my $regexp='('.join('|', @{$config{wiki_file_prune_regexps}}).')'; - return $file =~ m/$regexp/ && $file ne $base; + return $file =~ m/$regexp/; } sub define_gettext () { diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 246c2260d..a8236b954 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -279,24 +279,26 @@ sub find_src_files () { find({ no_chdir => 1, wanted => sub { - $_=decode_utf8($_); - if (file_pruned($_, $config{srcdir})) { + my $file=decode_utf8($_); + $file=~s/^\Q$config{srcdir}\E\/?//; + my $page = pagename($file); + if (! exists $pagesources{$page} && + file_pruned($file)) { $File::Find::prune=1; + return; } - elsif (! -l $_ && ! -d _) { - my ($f)=/$config{wiki_file_regexp}/; # untaint - if (! defined $f) { - warn(sprintf(gettext("skipping bad filename %s"), $_)."\n"); - } - else { - $f=~s/^\Q$config{srcdir}\E\/?//; - push @files, $f; - my $pagename = pagename($f); - if ($pages{$pagename}) { - debug(sprintf(gettext("%s has multiple possible source pages"), $pagename)); - } - $pages{$pagename}=1; + return if -l $_ || -d _ || ! length $file; + + my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint + if (! defined $f) { + warn(sprintf(gettext("skipping bad filename %s"), $file)."\n"); + } + else { + push @files, $f; + if ($pages{$page}) { + debug(sprintf(gettext("%s has multiple possible source pages"), $page)); } + $pages{$page}=1; } }, }, $config{srcdir}); @@ -304,27 +306,28 @@ sub find_src_files () { find({ no_chdir => 1, wanted => sub { - $_=decode_utf8($_); - if (file_pruned($_, $dir)) { + my $file=decode_utf8($_); + $file=~s/^\Q$dir\E\/?//; + my $page=pagename($file); + if (! exists $pagesources{$page} && + file_pruned($file)) { $File::Find::prune=1; + return; } - elsif (! -l $_ && ! -d _) { - my ($f)=/$config{wiki_file_regexp}/; # untaint - if (! defined $f) { - warn(sprintf(gettext("skipping bad filename %s"), $_)."\n"); - } - else { - $f=~s/^\Q$dir\E\/?//; - # avoid underlaydir - # override attacks; see - # security.mdwn - if (! -l "$config{srcdir}/$f" && - ! -e _) { - my $page=pagename($f); - if (! $pages{$page}) { - push @files, $f; - $pages{$page}=1; - } + return if -l $_ || -d _ || ! length $file; + + my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint + if (! defined $f) { + warn(sprintf(gettext("skipping bad filename %s"), $file)."\n"); + } + else { + # avoid underlaydir override + # attacks; see security.mdwn + if (! -l "$config{srcdir}/$f" && + ! -e _) { + if (! $pages{$page}) { + push @files, $f; + $pages{$page}=1; } } } diff --git a/debian/changelog b/debian/changelog index ca5409af7..6c4353065 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,8 @@ ikiwiki (3.14159266) UNRELEASED; urgency=low * mirrorlist: Display nothing if list is empty. * Fix a bug that could lead to duplicate links being recorded for tags. + * Optimize away most expensive file prune calls, when refreshing, + by only checking new files. -- Joey Hess <joeyh@debian.org> Sun, 27 Sep 2009 17:40:03 -0400 |