aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joey@gnu.kitenet.net>2009-10-08 20:27:56 -0400
committerJoey Hess <joey@gnu.kitenet.net>2009-10-08 20:27:56 -0400
commit3bb00d142a4d31316fc6f0eb1277aa2fafe4dbe9 (patch)
tree6f17cac8538244cfd35ba403f8e5fe0e7401fc26
parent4b8ca7cfc147b2016b17cc88a21052a7ee6d46fb (diff)
downloadikiwiki-3bb00d142a4d31316fc6f0eb1277aa2fafe4dbe9.tar
ikiwiki-3bb00d142a4d31316fc6f0eb1277aa2fafe4dbe9.tar.gz
Optimize away most expensive file prune calls, when refreshing
Benchmarking refresh of a a wiki with 25 thousand pages showed file_pruned() using most of the time. But, when refreshing, ikiwiki already knows about nearly all the files. So we can skip calling file_pruned() for those it knows about. While tricky to do, this sped up a refresh (that otherwise does no work) by 10-50%.
-rw-r--r--IkiWiki.pm17
-rw-r--r--IkiWiki/Render.pm71
-rw-r--r--debian/changelog2
3 files changed, 49 insertions, 41 deletions
diff --git a/IkiWiki.pm b/IkiWiki.pm
index 97d84c9de..d667e7e10 100644
--- a/IkiWiki.pm
+++ b/IkiWiki.pm
@@ -32,7 +32,6 @@ our $installdir='/usr'; # INSTALLDIR_AUTOREPLACE done by Makefile, DNE
use Memoize;
memoize("abs2rel");
memoize("pagespec_translate");
-memoize("file_pruned");
memoize("template_file");
sub getsetup () {
@@ -1770,14 +1769,18 @@ sub add_depends ($$) {
return 1;
}
-sub file_pruned ($$) {
- require File::Spec;
- my $file=File::Spec->canonpath(shift);
- my $base=File::Spec->canonpath(shift);
- $file =~ s#^\Q$base\E/+##;
+sub file_pruned ($;$) {
+ my $file=shift;
+ if (@_) {
+ require File::Spec;
+ $file=File::Spec->canonpath($file);
+ my $base=File::Spec->canonpath(shift);
+ return if $file eq $base;
+ $file =~ s#^\Q$base\E/+##;
+ }
my $regexp='('.join('|', @{$config{wiki_file_prune_regexps}}).')';
- return $file =~ m/$regexp/ && $file ne $base;
+ return $file =~ m/$regexp/;
}
sub define_gettext () {
diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm
index 246c2260d..a8236b954 100644
--- a/IkiWiki/Render.pm
+++ b/IkiWiki/Render.pm
@@ -279,24 +279,26 @@ sub find_src_files () {
find({
no_chdir => 1,
wanted => sub {
- $_=decode_utf8($_);
- if (file_pruned($_, $config{srcdir})) {
+ my $file=decode_utf8($_);
+ $file=~s/^\Q$config{srcdir}\E\/?//;
+ my $page = pagename($file);
+ if (! exists $pagesources{$page} &&
+ file_pruned($file)) {
$File::Find::prune=1;
+ return;
}
- elsif (! -l $_ && ! -d _) {
- my ($f)=/$config{wiki_file_regexp}/; # untaint
- if (! defined $f) {
- warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
- }
- else {
- $f=~s/^\Q$config{srcdir}\E\/?//;
- push @files, $f;
- my $pagename = pagename($f);
- if ($pages{$pagename}) {
- debug(sprintf(gettext("%s has multiple possible source pages"), $pagename));
- }
- $pages{$pagename}=1;
+ return if -l $_ || -d _ || ! length $file;
+
+ my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
+ if (! defined $f) {
+ warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
+ }
+ else {
+ push @files, $f;
+ if ($pages{$page}) {
+ debug(sprintf(gettext("%s has multiple possible source pages"), $page));
}
+ $pages{$page}=1;
}
},
}, $config{srcdir});
@@ -304,27 +306,28 @@ sub find_src_files () {
find({
no_chdir => 1,
wanted => sub {
- $_=decode_utf8($_);
- if (file_pruned($_, $dir)) {
+ my $file=decode_utf8($_);
+ $file=~s/^\Q$dir\E\/?//;
+ my $page=pagename($file);
+ if (! exists $pagesources{$page} &&
+ file_pruned($file)) {
$File::Find::prune=1;
+ return;
}
- elsif (! -l $_ && ! -d _) {
- my ($f)=/$config{wiki_file_regexp}/; # untaint
- if (! defined $f) {
- warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
- }
- else {
- $f=~s/^\Q$dir\E\/?//;
- # avoid underlaydir
- # override attacks; see
- # security.mdwn
- if (! -l "$config{srcdir}/$f" &&
- ! -e _) {
- my $page=pagename($f);
- if (! $pages{$page}) {
- push @files, $f;
- $pages{$page}=1;
- }
+ return if -l $_ || -d _ || ! length $file;
+
+ my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
+ if (! defined $f) {
+ warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
+ }
+ else {
+ # avoid underlaydir override
+ # attacks; see security.mdwn
+ if (! -l "$config{srcdir}/$f" &&
+ ! -e _) {
+ if (! $pages{$page}) {
+ push @files, $f;
+ $pages{$page}=1;
}
}
}
diff --git a/debian/changelog b/debian/changelog
index ca5409af7..6c4353065 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,8 @@ ikiwiki (3.14159266) UNRELEASED; urgency=low
* mirrorlist: Display nothing if list is empty.
* Fix a bug that could lead to duplicate links being recorded
for tags.
+ * Optimize away most expensive file prune calls, when refreshing,
+ by only checking new files.
-- Joey Hess <joeyh@debian.org> Sun, 27 Sep 2009 17:40:03 -0400