;;; Guix Data Service -- Information about Guix over time ;;; Copyright © 2020 Christopher Baines ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU Affero General Public License ;;; as published by the Free Software Foundation, either version 3 of ;;; the License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with this program. If not, see ;;; . (define-module (guix-data-service data-deletion) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (ice-9 match) #:use-module (ice-9 threads) #:use-module (squee) #:use-module (fibers) #:use-module (fibers channels) #:use-module (guix-data-service utils) #:use-module (guix-data-service database) #:use-module (guix-data-service model git-branch) #:use-module (guix-data-service model package-derivation-by-guix-revision-range) #:export (delete-guix-revisions delete-data-for-branch delete-revisions-from-branch-except-most-recent-n delete-revisions-for-all-branches-except-most-recent-n delete-data-for-all-deleted-branches delete-unreferenced-derivations delete-nars-for-unknown-store-paths)) (define (delete-guix-revisions conn git-repository-id commits) (define (delete-unreferenced-package-derivations) (exec-query conn " DELETE FROM package_derivations WHERE NOT EXISTS ( SELECT 1 FROM guix_revision_package_derivations WHERE guix_revision_package_derivations.package_derivation_id = package_derivations.id )")) (define (delete-unreferenced-lint-warnings) (exec-query conn " DELETE FROM lint_warnings WHERE NOT EXISTS ( SELECT 1 FROM guix_revision_lint_warnings WHERE guix_revision_lint_warnings.lint_warning_id = lint_warnings.id )")) (define (delete-unreferenced-lint-checkers) (exec-query conn " DELETE FROM lint_checkers WHERE NOT EXISTS ( SELECT 1 FROM guix_revision_lint_checkers WHERE guix_revision_lint_checkers.lint_checker_id = lint_checkers.id )")) (let ((guix-revision-ids (map car (exec-query conn (string-append " SELECT guix_revisions.id FROM (VALUES " (string-join (map (lambda (commit) (string-append "('" commit "')")) commits) ", ") ") AS commits INNER JOIN guix_revisions ON guix_revisions.commit = commits.column1 WHERE guix_revisions.git_repository_id = " (number->string git-repository-id) " AND commits.column1 NOT IN ( SELECT commit FROM git_commits )"))))) (unless (null? guix-revision-ids) (for-each (lambda (table) (exec-query conn (simple-format #f " DELETE FROM ~A WHERE ~A IN (VALUES ~A)" table (if (string=? table "guix_revision_package_derivations") "revision_id" "guix_revision_id") (string-join (map (lambda (guix-revision-id) (string-append "(" guix-revision-id ")")) guix-revision-ids) ", ")))) '("channel_instances" "guix_revision_channel_news_entries" "guix_revision_lint_checkers" "guix_revision_lint_warnings" "guix_revision_package_derivations" "guix_revision_system_test_derivations" "guix_revision_package_derivation_distribution_counts")) (exec-query conn (string-append " DELETE FROM guix_revisions WHERE id IN (" (string-join guix-revision-ids ", ") ") AND id NOT IN ( SELECT guix_revisions.id FROM guix_revisions INNER JOIN git_branches ON git_branches.git_repository_id = guix_revisions.git_repository_id INNER JOIN git_commits ON git_commits.git_branch_id = git_branches.id AND git_commits.commit = guix_revisions.commit )")) (delete-unreferenced-package-derivations) (delete-unreferenced-lint-warnings) (delete-unreferenced-lint-checkers)))) (define (delete-revisions-from-branch conn git-repository-id branch-name commits) (define (delete-jobs conn) (for-each (lambda (table) (exec-query conn (string-append " DELETE FROM " table " WHERE job_id IN ( SELECT id FROM load_new_guix_revision_jobs WHERE git_repository_id = " (number->string git-repository-id) " AND commit IN (" (string-join (map (lambda (commit) (string-append "'" commit "'")) commits) ", ") ") )"))) '("load_new_guix_revision_job_events" "load_new_guix_revision_job_logs")) (exec-query conn (string-append " DELETE FROM load_new_guix_revision_jobs WHERE git_repository_id = " (number->string git-repository-id) " AND commit IN (" (string-join (map (lambda (commit) (string-append "'" commit "'")) commits) ", ") ")"))) (define (delete-from-git-commits conn) (exec-query conn (simple-format #f " DELETE FROM git_commits WHERE id IN ( SELECT git_commits.id FROM git_commits INNER JOIN git_branches ON git_branches.id = git_commits.git_branch_id WHERE git_branches.git_repository_id = ~A AND git_branches.name = '~A' AND git_commits.commit IN (~A) )" git-repository-id branch-name (string-join (map (lambda (commit) (string-append "'" commit "'")) commits) ", ")))) (catch 'psql-query-error (lambda () (with-postgresql-transaction conn (lambda (conn) (obtain-advisory-transaction-lock conn 'delete-revisions-from-branch) (exec-query conn "SET LOCAL lock_timeout = '5s';") (delete-from-git-commits conn) (delete-jobs conn) (let ((git-branch-id (git-branch-for-repository-and-name conn git-repository-id branch-name))) (exec-query conn (string-append " DROP TABLE IF EXISTS package_derivations_by_guix_revision_range_git_branch_" (number->string git-branch-id) ";"))) (delete-guix-revisions conn git-repository-id commits)))) (lambda (key . args) (simple-format (current-error-port) "error when attempting to delete revisions from branch: ~A ~A\n" key args) (apply throw key args)))) (define (delete-data-for-branch conn git-repository-id branch-name) (define commits (map car (exec-query conn " SELECT git_commits.commit FROM git_branches INNER JOIN git_commits ON git_branches.id = git_commits.git_branch_id WHERE git_repository_id = $1 AND git_branches.name = $2" (list (number->string git-repository-id) branch-name)))) (delete-revisions-from-branch conn git-repository-id branch-name commits) (exec-query conn " DELETE FROM git_branches WHERE name = $1 AND git_repository_id = $2" (list branch-name (number->string git-repository-id)))) (define (delete-revisions-from-branch-except-most-recent-n conn git-repository-id branch-name n) (define commits (map car (exec-query conn " SELECT commit FROM git_commits INNER JOIN git_branches ON git_branches.id = git_commits.git_branch_id WHERE git_repository_id = $1 AND name = $2 ORDER BY datetime DESC OFFSET $3" (list (number->string git-repository-id) branch-name (number->string n))))) (unless (null? commits) (simple-format #t "deleting ~A commits from ~A\n" (length commits) branch-name) (delete-revisions-from-branch conn git-repository-id branch-name commits) (simple-format #t "repopulating package_derivations_by_guix_revision_range\n") (insert-guix-revision-package-derivation-entries conn (number->string git-repository-id) branch-name))) (define (delete-revisions-for-all-branches-except-most-recent-n n) (with-postgresql-connection "data-deletion" (lambda (conn) (for-each (match-lambda ((git-repository-id branch-name) (delete-revisions-from-branch-except-most-recent-n conn (string->number git-repository-id) branch-name n))) (exec-query conn " SELECT DISTINCT git_repository_id, name FROM git_branches"))))) (define (delete-data-for-all-branches-but-master) (with-postgresql-connection "data-deletion" (lambda (conn) (for-each (lambda (branch-name) (delete-data-for-branch conn 1 branch-name)) (map car (exec-query conn " SELECT DISTINCT name FROM git_branches WHERE git_repository_id = 1 AND name != 'master'")))))) (define (delete-data-for-all-deleted-branches) (with-postgresql-connection "data-deletion" (lambda (conn) (for-each (match-lambda ((name git-repository-id) (simple-format #t "deleting data for ~A (~A)\n" name git-repository-id) (delete-data-for-branch conn (string->number git-repository-id) name))) (exec-query conn " SELECT name, git_repository_id FROM ( SELECT DISTINCT ON (name, git_repository_id) name, git_repository_id, commit FROM git_branches INNER JOIN git_commits ON git_commits.git_branch_id = git_branches.id ORDER BY git_repository_id, name, datetime DESC ) AS git_branches_latest_revision WHERE commit = ''"))))) (define* (delete-unreferenced-derivations #:key (batch-size 100000)) (define (delete-builds-for-derivation-output-details-set conn derivation-output-details-set-id) (let ((build-ids (map car (exec-query conn " SELECT id FROM builds WHERE derivation_output_details_set_id = $1" (list derivation-output-details-set-id))))) (unless (null? build-ids) (exec-query conn (string-append " DELETE FROM build_status WHERE build_id IN (" (string-join build-ids ",") ")")) (exec-query conn (string-append " DELETE FROM latest_build_status WHERE build_id IN (" (string-join build-ids ",") ")")) (exec-query conn (string-append " DELETE FROM builds WHERE id IN (" (string-join build-ids ",") ")"))))) (define (delete-blocked-builds-for-derivation-output-details-set conn derivation-output-details-set-id) ;; Do this for each build server individually, as that helps PostgreSQL ;; efficiently check the partitions (let ((build-server-ids (map car (exec-query conn "SELECT id FROM build_servers")))) (for-each (lambda (build-server-id) (exec-query conn " DELETE FROM blocked_builds WHERE build_server_id = $1 AND ( blocked_derivation_output_details_set_id = $2 OR blocking_derivation_output_details_set_id = $3 )" (list build-server-id derivation-output-details-set-id derivation-output-details-set-id))) build-server-ids))) (define (delete-unreferenced-derivations-source-files conn) (define (delete-batch) (exec-query conn " DELETE FROM derivation_source_files WHERE id IN ( SELECT id FROM derivation_source_files WHERE NOT EXISTS ( SELECT 1 FROM derivation_sources WHERE derivation_source_file_id = derivation_source_files.id ) LIMIT 100 ) RETURNING id")) (while (not (null? (delete-batch))) #t)) (define (maybe-delete-derivation conn id) (match (map car (exec-query conn " DELETE FROM derivation_outputs WHERE derivation_id = $1 AND NOT EXISTS ( SELECT 1 FROM derivation_inputs WHERE derivation_output_id IN ( SELECT derivation_outputs.id FROM derivation_outputs WHERE derivation_id = $1 ) ) RETURNING derivation_outputs.derivation_output_details_id" (list id))) (() 0) ((derivation-output-details-ids ...) (for-each (lambda (derivation-output-details-id) (unless (string->number derivation-output-details-id) (error (simple-format #f "derivation-output-details-id: ~A is not a number" derivation-output-details-id))) (match (exec-query conn " SELECT COUNT(*) FROM derivation_outputs WHERE derivation_output_details_id = $1" (list derivation-output-details-id)) (((count)) (when (eq? (string->number count) 0) (exec-query conn " DELETE FROM derivation_output_details WHERE id = $1" (list derivation-output-details-id)))))) derivation-output-details-ids) (exec-query conn " DELETE FROM derivation_sources WHERE derivation_id = $1" (list id)) (match (exec-query conn " SELECT derivation_output_details_set_id FROM derivations_by_output_details_set WHERE derivation_id = $1" (list id)) (((derivation-output-details-set-id)) (match (exec-query conn " SELECT COUNT(*) FROM derivations_by_output_details_set WHERE derivation_output_details_set_id = $1" (list derivation-output-details-set-id)) (((count)) (exec-query conn " DELETE FROM derivations_by_output_details_set WHERE derivation_id = $1" (list id)) (when (<= (string->number count) 1) (delete-builds-for-derivation-output-details-set conn derivation-output-details-set-id) (delete-blocked-builds-for-derivation-output-details-set conn derivation-output-details-set-id) (exec-query conn " DELETE FROM derivation_output_details_sets WHERE id = $1" (list derivation-output-details-set-id))))))) (exec-query conn " DELETE FROM derivation_inputs WHERE derivation_id = $1" (list id)) (exec-query conn " DELETE FROM derivations WHERE id = $1" (list id)) 1))) (define deleted-count 0) (define channel (make-channel)) (define (delete-batch conn) (let* ((derivations (with-time-logging "fetching batch of derivations" (map car (exec-query conn " SELECT DISTINCT derivation_id FROM derivation_outputs WHERE NOT EXISTS ( -- This isn't a perfect check, as this will select some derivations that are -- used, but maybe-delete-derivation includes the proper check SELECT 1 FROM derivation_inputs WHERE derivation_output_id = derivation_outputs.id ) AND NOT EXISTS ( SELECT 1 FROM package_derivations WHERE package_derivations.derivation_id = derivation_outputs.derivation_id ) AND NOT EXISTS ( SELECT 1 FROM channel_instances WHERE derivation_id = derivation_outputs.derivation_id ) AND NOT EXISTS ( SELECT 1 FROM guix_revision_system_test_derivations WHERE derivation_id = derivation_outputs.derivation_id ) LIMIT $1" (list (number->string batch-size)))))) (derivations-count (length derivations))) (with-time-logging (simple-format #f "Looking at ~A derivations" derivations-count) (set! deleted-count 0) (for-each (lambda (derivation-id) (put-message channel derivation-id)) derivations)) (simple-format (current-error-port) "Deleted ~A derivations\n" deleted-count) deleted-count)) (run-fibers (lambda () ;; First spawn some fibers to delete the derivations (for-each (lambda _ (spawn-fiber (lambda () (with-postgresql-connection "data-deletion" (lambda (conn) (let loop ((derivation-id (get-message channel))) (unless (string->number derivation-id) (error (simple-format #f "derivation-id: ~A is not a number" derivation-id))) (let ((val (catch 'psql-query-error (lambda () (with-postgresql-transaction conn (lambda (conn) (exec-query conn " SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED") (exec-query conn "SET LOCAL lock_timeout = '5s';") (maybe-delete-derivation conn derivation-id)))) (lambda (key . args) (simple-format (current-error-port) "error when attempting to delete derivation: ~A ~A\n" key args) 0)))) ;; This is safe as all fibers are in the same ;; thread and cooperative. (set! deleted-count (+ val deleted-count))) (loop (get-message channel)))))))) (iota 12)) (with-postgresql-connection "data-deletion" (lambda (conn) (obtain-advisory-transaction-lock conn 'delete-unreferenced-derivations) (let loop ((total-deleted 0)) (let ((batch-deleted-count (delete-batch conn))) (if (eq? 0 batch-deleted-count) (begin (with-time-logging "Deleting unused derivation_source_files entries" (delete-unreferenced-derivations-source-files conn)) (simple-format (current-output-port) "Finished deleting derivations, deleted ~A in total\n" total-deleted)) (loop (+ total-deleted batch-deleted-count)))))))) #:hz 0 #:parallelism 1)) (define (delete-nars-for-unknown-store-paths) (define (get-nar-ids-batch conn) (map car (exec-query conn " SELECT id FROM nars WHERE NOT EXISTS ( SELECT 1 FROM derivation_output_details WHERE derivation_output_details.path = nars.store_path ) LIMIT 50"))) (define (delete-narinfo-signature-data conn nar-ids) (exec-query conn (string-append " DELETE FROM narinfo_signature_data WHERE id IN ( SELECT narinfo_signature_data_id FROM narinfo_signatures WHERE nar_id IN (" (string-join nar-ids ",") ") )"))) (define (delete-nars conn nar-ids) (exec-query conn (string-append " DELETE FROM nars WHERE id IN (" (string-join nar-ids ",") " )"))) (with-postgresql-connection "data-deletion" (lambda (conn) (with-advisory-session-lock conn 'delete-nars-for-unknown-store-paths (lambda () (newline) (let loop ((nar-ids (get-nar-ids-batch conn))) (unless (null? nar-ids) (delete-narinfo-signature-data conn nar-ids) (delete-nars conn nar-ids) (display ".") (force-output) (loop (get-nar-ids-batch conn)))))))))