aboutsummaryrefslogtreecommitdiff
path: root/sqitch
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2023-03-09 08:29:39 +0000
committerChristopher Baines <mail@cbaines.net>2023-03-09 08:29:39 +0000
commite39c9da028a9a29a0212cdb0287b0046bb786c32 (patch)
tree27260624f5437663a87afeca1376a2c3463401fa /sqitch
parent55059558e7ba636fb46d84058b1d73a372ce0dd8 (diff)
downloaddata-service-e39c9da028a9a29a0212cdb0287b0046bb786c32.tar
data-service-e39c9da028a9a29a0212cdb0287b0046bb786c32.tar.gz
Store the distribution of derivations related to packages
This might be generally useful, but I've been looking at it as it offers a way to try and improve query performance when you want to select all the derivations related to the packages for a revision. The data looks like this (for a specified system and target): ┌───────┬───────┐ │ level │ count │ ├───────┼───────┤ │ 15 │ 2 │ │ 14 │ 3 │ │ 13 │ 3 │ │ 12 │ 3 │ │ 11 │ 14 │ │ 10 │ 25 │ │ 9 │ 44 │ │ 8 │ 91 │ │ 7 │ 1084 │ │ 6 │ 311 │ │ 5 │ 432 │ │ 4 │ 515 │ │ 3 │ 548 │ │ 2 │ 2201 │ │ 1 │ 21162 │ │ 0 │ 22310 │ └───────┴───────┘ Level 0 reflects the number of packages. Level 1 is similar as you have all the derivations for the package origins. The remaining levels contain less packages since it's mostly just derivations involved in bootstrapping. When using a recursive CTE to collect all the derivations, PostgreSQL assumes that the each derivation has the same number of inputs, and this leads to a large overestimation of the number of derivations per a revision. This in turn can lead to PostgreSQL picking a slower way of running the query. When it's known how many new derivations you should see at each level, it's possible to inform PostgreSQL this by using LIMIT's at various points in the query. This reassures the query planner that it's not going to be handling lots of rows and helps it make better decisions about how to execute the query.
Diffstat (limited to 'sqitch')
-rw-r--r--sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql13
-rw-r--r--sqitch/revert/guix_revision_package_derivation_distribution_counts.sql7
-rw-r--r--sqitch/sqitch.plan1
-rw-r--r--sqitch/verify/guix_revision_package_derivation_distribution_counts.sql7
4 files changed, 28 insertions, 0 deletions
diff --git a/sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql b/sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql
new file mode 100644
index 0000000..58829c5
--- /dev/null
+++ b/sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql
@@ -0,0 +1,13 @@
+-- Deploy guix-data-service:guix_revision_package_derivation_distribution_counts to pg
+
+BEGIN;
+
+CREATE TABLE guix_revision_package_derivation_distribution_counts (
+ guix_revision_id integer NOT NULL REFERENCES guix_revisions (id),
+ system_id integer NOT NULL REFERENCES systems (id),
+ target varchar NOT NULL,
+ level integer NOT NULL,
+ distinct_derivations integer NOT NULL
+);
+
+COMMIT;
diff --git a/sqitch/revert/guix_revision_package_derivation_distribution_counts.sql b/sqitch/revert/guix_revision_package_derivation_distribution_counts.sql
new file mode 100644
index 0000000..7956b30
--- /dev/null
+++ b/sqitch/revert/guix_revision_package_derivation_distribution_counts.sql
@@ -0,0 +1,7 @@
+-- Revert guix-data-service:guix_revision_package_derivation_distribution_counts from pg
+
+BEGIN;
+
+-- XXX Add DDLs here.
+
+COMMIT;
diff --git a/sqitch/sqitch.plan b/sqitch/sqitch.plan
index 64b2189..caab662 100644
--- a/sqitch/sqitch.plan
+++ b/sqitch/sqitch.plan
@@ -93,3 +93,4 @@ blocked_builds 2022-11-07T11:27:28Z Chris <chris@felis> # Add blocked_builds
package_derivations_extended_statistics 2022-11-12T10:40:18Z Chris <chris@felis> # Add extended statistics on package_derivations
derivation_outputs_id_and_derivation_id_idx 2022-11-12T10:41:42Z Chris <chris@felis> # Add index on derivation_outputs id and derivation_id
blocked_builds_blocked_builds_blocked_derivation_output_details_set_id_2 2023-03-05T10:19:53Z Chris <chris@felis> # Add index on blocked_builds_blocked_derivation_output_details_set_id
+guix_revision_package_derivation_distribution_counts 2023-03-08T16:53:44Z Chris <chris@felis> # Add guix_revision_package_derivation_distribution_counts table
diff --git a/sqitch/verify/guix_revision_package_derivation_distribution_counts.sql b/sqitch/verify/guix_revision_package_derivation_distribution_counts.sql
new file mode 100644
index 0000000..1f7edd2
--- /dev/null
+++ b/sqitch/verify/guix_revision_package_derivation_distribution_counts.sql
@@ -0,0 +1,7 @@
+-- Verify guix-data-service:guix_revision_package_derivation_distribution_counts on pg
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;