From 0bfc364a7df124509855b8ed0b1b33ab5bc9ebbb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 11 Apr 2010 01:30:03 -0400 Subject: optimization: pagespec_match_list with no num limit matches before sorting This can be a lot faster, since huge numbers of pages are not sorted only to mostly be thrown away. It sped up a build of my blog by at least 5 minutes. --- IkiWiki.pm | 38 ++++++++++++++++++++++++++------------ doc/todo/smarter_sorting.mdwn | 3 +++ t/pagespec_match_list.t | 6 +++++- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index 2cad6a3ef..74d452c50 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -1951,8 +1951,9 @@ sub add_link ($$;$) { } } -sub sortspec_translate ($) { +sub sortspec_translate ($$) { my $spec = shift; + my $reverse = shift; my $code = ""; my @data; @@ -2007,6 +2008,10 @@ sub sortspec_translate ($) { return sub { 0 }; } + if ($reverse) { + $code="-($code)"; + } + no warnings; return eval 'sub { '.$code.' }'; } @@ -2109,18 +2114,20 @@ sub pagespec_match_list ($$;@) { ? grep { ! $params{filter}->($_) } keys %pagesources : keys %pagesources; } - - if (defined $params{sort}) { - @candidates = IkiWiki::SortSpec::sort_pages($params{sort}, - @candidates); + + my $num=$params{num}; + my $sort=$params{sort}; + my $reverse=$params{reverse}; + # when only the top matches will be returned, it's efficient to + # sort before matching to pagespec, + if (defined $num && defined $sort) { + @candidates=IkiWiki::SortSpec::sort_pages( + $sort, $reverse, @candidates); } - - @candidates=reverse(@candidates) if $params{reverse}; $depends{$page}{$pagespec} |= ($params{deptype} || $DEPEND_CONTENT); # clear params, remainder is passed to pagespec - my $num=$params{num}; delete @params{qw{num deptype reverse sort filter list}}; my @matches; @@ -2144,7 +2151,15 @@ sub pagespec_match_list ($$;@) { $depends_simple{$page}{lc $k} |= $i->{$k}; } - return @matches; + # when all matches will be returned, it's efficient to + # sort after matching + if (! defined $num && defined $sort) { + return IkiWiki::SortSpec::sort_pages( + $sort, $reverse, @matches); + } + else { + return @matches; + } } sub pagespec_valid ($) { @@ -2437,9 +2452,8 @@ package IkiWiki::SortSpec; # This is in the SortSpec namespace so that the $a and $b that sort() uses # are easily available in this namespace, for cmp functions to use them. sub sort_pages { - my $f = IkiWiki::sortspec_translate(shift); - - return sort $f @_; + my $f=IkiWiki::sortspec_translate(shift, shift); + sort $f @_ } sub cmp_title { diff --git a/doc/todo/smarter_sorting.mdwn b/doc/todo/smarter_sorting.mdwn index 5a6d63ef1..f8ac216dc 100644 --- a/doc/todo/smarter_sorting.mdwn +++ b/doc/todo/smarter_sorting.mdwn @@ -29,5 +29,8 @@ date range.) Well, in this case, it *does* make sense to flip it, limit by pagespe first, and do a (quick)sort second. (No influence complications, either.) +> Flipping when there's no limit implemented, and it knocked 1/3 off +> the rebuild time of my blog's archive pages. --[[Joey]] + Adding these special cases will be more complicated, but I think the best of both worlds. --[[Joey]] diff --git a/t/pagespec_match_list.t b/t/pagespec_match_list.t index 2ad7a9105..c7688c6c0 100755 --- a/t/pagespec_match_list.t +++ b/t/pagespec_match_list.t @@ -1,7 +1,7 @@ #!/usr/bin/perl use warnings; use strict; -use Test::More tests => 90; +use Test::More tests => 92; BEGIN { use_ok("IkiWiki"); } @@ -38,12 +38,16 @@ $links{foo3}=[qw{bar}]; is_deeply([pagespec_match_list("foo", "bar")], ["bar"]); is_deeply([sort(pagespec_match_list("foo", "* and !post/*"))], ["bar", "foo", "foo2", "foo3"]); is_deeply([sort(pagespec_match_list("foo", "post/*"))], ["post/1", "post/2", "post/3"]); +is_deeply([pagespec_match_list("foo", "post/*", sort => "title")], + ["post/1", "post/2", "post/3"]); is_deeply([pagespec_match_list("foo", "post/*", sort => "title", reverse => 1)], ["post/3", "post/2", "post/1"]); is_deeply([pagespec_match_list("foo", "post/*", sort => "title", num => 2)], ["post/1", "post/2"]); is_deeply([pagespec_match_list("foo", "post/*", sort => "title", num => 50)], ["post/1", "post/2", "post/3"]); +is_deeply([pagespec_match_list("foo", "post/*", sort => "title", num => 50, reverse => 1)], + ["post/3", "post/2", "post/1"]); is_deeply([pagespec_match_list("foo", "post/*", sort => "title", filter => sub { $_[0] =~ /3/}) ], ["post/1", "post/2"]); -- cgit v1.2.3