;;; Guix Data Service -- Information about Guix over time ;;; Copyright © 2019 Christopher Baines ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU Affero General Public License ;;; as published by the Free Software Foundation, either version 3 of ;;; the License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with this program. If not, see ;;; . (define-module (guix-data-service model derivation) #:use-module (srfi srfi-1) #:use-module (srfi srfi-43) #:use-module (ice-9 vlist) #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (ice-9 binary-ports) #:use-module (rnrs bytevectors) #:use-module (gcrypt hash) #:use-module (squee) #:use-module (json) #:use-module (guix base16) #:use-module (guix base32) #:use-module (guix serialization) #:use-module (lzlib) #:use-module (guix inferior) #:use-module (guix memoization) #:use-module (guix derivations) #:use-module (guix-data-service utils) #:use-module (guix-data-service database) #:use-module (guix-data-service model utils) #:use-module (guix-data-service model guix-revision) #:use-module (guix-data-service model guix-revision-package-derivation) #:use-module (guix-data-service model system) #:export (valid-targets count-derivations select-derivation-by-file-name select-derivation-by-file-name-hash select-derivation-outputs-by-derivation-id select-derivation-outputs-by-derivation-file-name select-derivation-sources-by-derivation-id select-derivation-references-by-derivation-id select-derivation-source-file-by-store-path select-derivation-source-file-nar-details-by-file-name select-derivation-source-file-nar-data-by-file-name select-derivation-source-file-data-by-file-name-hash select-derivation-by-output-filename select-derivations-using-output select-package-derivations-in-revision search-package-derivations-in-revision select-fixed-output-package-derivations-in-revision select-derivation-outputs-in-revision fix-derivation-output-details-hash-encoding derivation-output-details->derivation-output-details-ids derivation-output-details-ids->derivation-output-details-set-id select-derivations-by-revision-name-and-version select-derivation-inputs-by-derivation-id select-serialized-derivation-by-file-name select-existing-derivations select-derivations-by-id select-derivations-and-build-status derivation-file-names->derivation-ids update-derivation-inputs-statistics vacuum-derivation-inputs-table update-derivation-outputs-statistics vacuum-derivation-outputs-table)) (define (valid-targets conn) '("arm-linux-gnueabihf" "aarch64-linux-gnu" "mips64el-linux-gnu" "powerpc-linux-gnu" "powerpc64le-linux-gnu" "riscv64-linux-gnu" "i586-pc-gnu" "i686-w64-mingw32" "x86_64-w64-mingw32")) (define (count-derivations conn) (first (exec-query conn "SELECT COUNT(*) FROM derivations"))) (define (select-existing-derivations file-names) (string-append "SELECT id, file_name " "FROM derivations " "WHERE file_name IN " "(" (string-join (map (lambda (file-name) (simple-format #f "'~A'" file-name)) file-names) ",") ");")) (define (select-from-derivation-output-details paths) (string-append "SELECT id, path FROM derivation_output_details " "WHERE path IN (" (string-join (map quote-string paths) ",") ")")) (define (select-derivation-by-output-filename conn filename) (define query (string-append "SELECT derivations.file_name, derivation_outputs.id " "FROM derivation_output_details " "INNER JOIN derivation_outputs" " ON derivation_output_details.id = derivation_outputs.derivation_output_details_id " "INNER JOIN derivations" " ON derivation_outputs.derivation_id = derivations.id " "WHERE derivation_output_details.path = $1")) (exec-query conn query (list filename))) (define (select-derivations-using-output conn output-id) (define query (string-append "SELECT derivations.file_name " "FROM derivations " "INNER JOIN derivation_inputs" " ON derivation_inputs.derivation_id = derivations.id " "WHERE derivation_output_id = $1 " "ORDER BY derivations.file_name " "LIMIT 100 ")) (exec-query conn query (list output-id))) (define (select-derivations-by-revision-name-and-version conn revision-commit-hash name version) (define query " SELECT systems.system, package_derivations.target, derivations.file_name, JSON_AGG( json_build_object( 'build_server_id', builds.build_server_id, 'build_server_build_id', builds.build_server_build_id, 'status', latest_build_status.status, 'timestamp', latest_build_status.timestamp, 'build_for_equivalent_derivation', builds.derivation_file_name != derivations.file_name ) ORDER BY latest_build_status.timestamp ) FROM derivations INNER JOIN systems ON derivations.system_id = systems.id INNER JOIN package_derivations ON derivations.id = package_derivations.derivation_id INNER JOIN packages ON package_derivations.package_id = packages.id INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id INNER JOIN derivations_by_output_details_set ON derivations.id = derivations_by_output_details_set.derivation_id LEFT OUTER JOIN builds ON derivations_by_output_details_set.derivation_output_details_set_id = builds.derivation_output_details_set_id LEFT OUTER JOIN build_servers ON builds.build_server_id = build_servers.id LEFT OUTER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE guix_revisions.commit = $1 AND packages.name = $2 AND packages.version = $3 GROUP BY systems.system, package_derivations.target, derivations.file_name ORDER BY systems.system DESC, NULLIF(package_derivations.target, '') DESC NULLS FIRST, derivations.file_name") (map (match-lambda ((system target file-name builds-json) (list system target file-name (filter (lambda (build) (string? (assoc-ref build "status"))) (vector->list (json-string->scm builds-json))))) ((file_name system target) (list file_name system target))) (exec-query conn query (list revision-commit-hash name version)))) (define* (select-package-derivations-in-revision conn commit-hash #:key systems targets minimum-builds maximum-builds build-from-build-servers no-build-from-build-servers limit-results after-name (include-builds? #t) ;; build-status: failing, ;; working, unknown build-status) (define criteria (string-join `(,@(filter-map (lambda (field values) (if values (string-append field " IN (" (string-join (map (lambda (value) (simple-format #f "'~A'" value)) values) ",") ")") #f)) '("systems.system" "target") (list systems targets)) ,@(if minimum-builds (list (string-append " ( SELECT COUNT(*) FROM builds WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id ) >= " (number->string minimum-builds))) '()) ,@(if maximum-builds (list (string-append " ( SELECT COUNT(*) FROM builds WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id ) <= " (number->string maximum-builds))) '()) ,@(map (lambda (build-server-id) (string-append " EXISTS( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status != 'canceled' AND builds.build_server_id = " (number->string build-server-id) " )")) (or build-from-build-servers '())) ,@(map (lambda (build-server-id) (string-append " NOT EXISTS( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status != 'canceled' AND builds.build_server_id = " (number->string build-server-id) " )")) (or no-build-from-build-servers '())) ,@(cond ((eq? build-status #f) '()) ((eq? build-status 'failing) '(" ( NOT EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'succeeded' ) AND EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'failed' ) )")) ((eq? build-status 'working) '(" EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'succeeded' )")) ((eq? build-status 'unknown) '(" ( NOT EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'succeeded' ) AND NOT EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'failed' ) )")) (else (error "unknown build-status")))) " AND ")) (define query (string-append " SELECT derivations.file_name, systems.system, package_derivations.target" (if include-builds? ", ( SELECT JSON_AGG( json_build_object( 'build_server_id', builds.build_server_id, 'build_server_build_id', builds.build_server_build_id, 'status', latest_build_status.status, 'timestamp', latest_build_status.timestamp, 'build_for_equivalent_derivation', builds.derivation_file_name != derivations.file_name ) ORDER BY latest_build_status.timestamp ) FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id ) AS builds" "") " FROM derivations INNER JOIN systems ON derivations.system_id = systems.id INNER JOIN derivations_by_output_details_set ON derivations.id = derivations_by_output_details_set.derivation_id INNER JOIN package_derivations ON derivations.id = package_derivations.derivation_id INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id INNER JOIN packages ON package_derivations.package_id = packages.id WHERE guix_revisions.commit = $1 " (if after-name " AND derivations.file_name > $2" "") (if (string-null? criteria) "" (string-append " AND " criteria)) " ORDER BY derivations.file_name " (if limit-results (string-append " LIMIT " (number->string limit-results)) ""))) (map (match-lambda ((file_name system target builds) (list file_name system target (if (or (and (string? builds) (string-null? builds)) (eq? #f builds)) #() (json-string->scm builds)))) ((file_name system target) (list file_name system target))) (exec-query conn query `(,commit-hash ,@(if after-name (list after-name) '()))))) (define* (search-package-derivations-in-revision conn commit-hash search-query #:key systems targets minimum-builds maximum-builds build-from-build-servers no-build-from-build-servers limit-results after-name (include-builds? #t) ;; build-status: failing, ;; working, unknown build-status) (define criteria (string-join `(,@(filter-map (lambda (field values) (if values (string-append field " IN (" (string-join (map (lambda (value) (simple-format #f "'~A'" value)) values) ",") ")") #f)) '("systems.system" "target") (list systems targets)) ,@(if minimum-builds (list (string-append " ( SELECT COUNT(*) FROM builds WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id ) >= " (number->string minimum-builds))) '()) ,@(if maximum-builds (list (string-append " ( SELECT COUNT(*) FROM builds WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id ) <= " (number->string maximum-builds))) '()) ,@(map (lambda (build-server-id) (string-append " EXISTS( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status != 'canceled' AND builds.build_server_id = " (number->string build-server-id) " )")) (or build-from-build-servers '())) ,@(map (lambda (build-server-id) (string-append " NOT EXISTS( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status != 'canceled' AND builds.build_server_id = " (number->string build-server-id) " )")) (or no-build-from-build-servers '())) ,@(cond ((eq? build-status #f) '()) ((eq? build-status 'failing) '(" ( NOT EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'succeeded' ) AND EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'failed' ) )")) ((eq? build-status 'working) '(" EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'succeeded' )")) ((eq? build-status 'unknown) '(" ( NOT EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'succeeded' ) AND NOT EXISTS ( SELECT 1 FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id AND latest_build_status.status = 'failed' ) )")) (else (error "unknown build-status")))) " AND ")) (define query (string-append " SELECT derivations.file_name, systems.system, package_derivations.target" (if include-builds? ", ( SELECT JSON_AGG( json_build_object( 'build_server_id', builds.build_server_id, 'build_server_build_id', builds.build_server_build_id, 'status', latest_build_status.status, 'timestamp', latest_build_status.timestamp, 'build_for_equivalent_derivation', builds.derivation_file_name != derivations.file_name ) ORDER BY latest_build_status.timestamp ) FROM builds INNER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE builds.derivation_output_details_set_id = derivations_by_output_details_set.derivation_output_details_set_id ) AS builds" "") " FROM derivations INNER JOIN systems ON derivations.system_id = systems.id INNER JOIN derivations_by_output_details_set ON derivations.id = derivations_by_output_details_set.derivation_id INNER JOIN package_derivations ON derivations.id = package_derivations.derivation_id INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id INNER JOIN packages ON package_derivations.package_id = packages.id WHERE guix_revisions.commit = $1 AND derivations.file_name LIKE $2 " (if after-name " AND derivations.file_name > $3" "") (if (string-null? criteria) "" (string-append " AND " criteria)) " ORDER BY derivations.file_name " (if limit-results (string-append " LIMIT " (number->string limit-results)) ""))) (map (match-lambda ((file_name system target) (list file_name system target)) ((file_name system target builds) (list file_name system target (if (or (and (string? builds) (string-null? builds)) (eq? #f builds)) #() (json-string->scm builds))))) (exec-query conn query `(,commit-hash ,(string-append "%" search-query "%") ,@(if after-name (list after-name) '()))))) (define* (select-fixed-output-package-derivations-in-revision conn commit system target #:key after-derivation-file-name (limit-results 50) ;; latest-build-status: failing, ;; working, unknown latest-build-status) (define query (string-append (get-sql-to-select-package-and-related-derivations-for-revision conn (commit->revision-id conn commit) #:system-id (system->system-id conn system) #:target target) " SELECT DISTINCT ON (derivations.file_name) derivations.file_name, ( CASE WHEN latest_build_status.status IS NULL THEN NULL ELSE json_build_object( 'build_server_id', builds.build_server_id, 'build_server_build_id', builds.build_server_build_id, 'status', latest_build_status.status, 'timestamp', latest_build_status.timestamp ) END ) AS latest_build FROM all_derivations INNER JOIN derivations ON all_derivations.derivation_id = derivations.id INNER JOIN derivation_outputs ON all_derivations.derivation_id = derivation_outputs.derivation_id INNER JOIN derivation_output_details ON derivation_outputs.derivation_output_details_id = derivation_output_details.id LEFT JOIN builds -- This is intentional, as we want to build/check this exact derivation, not -- any others that happen to produce the same output ON derivations.file_name = builds.derivation_file_name LEFT JOIN latest_build_status ON builds.id = latest_build_status.build_id -- These are the two interesting states, so ignore builds in any other states AND latest_build_status.status IN ('succeeded', 'failed') WHERE derivation_output_details.hash IS NOT NULL" (if after-derivation-file-name " AND derivations.file_name > $2" "") (if latest-build-status (simple-format #f " AND latest_build_status.status = $~A" (if after-derivation-file-name 3 2)) "") " ORDER BY derivations.file_name, latest_build_status.timestamp DESC LIMIT $1")) (map (match-lambda ((derivation_file_name latest_build) `((derivation_file_name . ,derivation_file_name) (latest_build . ,(if (null? latest_build) 'null (map (match-lambda ((key . value) (cons (string->symbol key) value))) (json-string->scm latest_build))))))) (exec-query-with-null-handling conn query `(,(number->string (or limit-results 999999)) ; TODO ,@(if after-derivation-file-name (list after-derivation-file-name) '()) ,@(if latest-build-status (list latest-build-status) '()))))) (define* (select-derivation-outputs-in-revision conn commit-hash #:key search-query output-consistency nars-from-build-servers no-nars-from-build-servers system target include-nars? limit-results after-path) (define query (string-append " SELECT packages.name, packages.version, derivation_output_details.path, derivation_output_details.hash_algorithm, derivation_output_details.hash, derivation_output_details.recursive" (if include-nars? ", ( SELECT JSON_AGG( json_build_object( 'build_server_id', narinfo_fetch_records.build_server_id, 'hash_algorithm', nars.hash_algorithm, 'hash', nars.hash, 'size', nars.size ) ) FROM nars INNER JOIN narinfo_signatures ON nars.id = narinfo_signatures.nar_id INNER JOIN narinfo_signature_data ON narinfo_signature_data.id = narinfo_signatures.narinfo_signature_data_id INNER JOIN narinfo_fetch_records ON narinfo_signature_data.id = narinfo_fetch_records.narinfo_signature_data_id WHERE nars.store_path = derivation_output_details.path ) AS nars" "") " FROM derivations INNER JOIN derivation_outputs ON derivations.id = derivation_outputs.derivation_id INNER JOIN derivation_output_details ON derivation_outputs.derivation_output_details_id = derivation_output_details.id INNER JOIN package_derivations ON derivations.id = package_derivations.derivation_id INNER JOIN systems ON package_derivations.system_id = systems.id INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id INNER JOIN packages ON package_derivations.package_id = packages.id WHERE guix_revisions.commit = $1 " (let ((criteria `(,@(if after-path '(" AND derivation_output_details.path > ") '()) ,@(if system '(" AND systems.system = ") '()) ,@(if target '(" AND package_derivations.target = ") '()) ,@(if search-query '(" AND derivation_output_details.path LIKE ") '())))) (string-concatenate (map (lambda (query count) (simple-format #f "~A$~A" query count)) criteria (iota (length criteria) 2)))) (if (list? nars-from-build-servers) (string-append " AND ARRAY[" (string-join (map number->string nars-from-build-servers) ", ") "]::integer[] <@ COALESCE(( -- contained by SELECT ARRAY_AGG(narinfo_fetch_records.build_server_id) FROM nars INNER JOIN narinfo_signatures ON nars.id = narinfo_signatures.nar_id INNER JOIN narinfo_signature_data ON narinfo_signature_data.id = narinfo_signatures.narinfo_signature_data_id INNER JOIN narinfo_fetch_records ON narinfo_signature_data.id = narinfo_fetch_records.narinfo_signature_data_id WHERE nars.store_path = derivation_output_details.path ), ARRAY[]::integer[])") "") (if (list? no-nars-from-build-servers) (string-append " AND NOT ARRAY[" (string-join (map number->string no-nars-from-build-servers) ", ") "]::integer[] && COALESCE(( SELECT ARRAY_AGG(narinfo_fetch_records.build_server_id) FROM nars INNER JOIN narinfo_signatures ON nars.id = narinfo_signatures.nar_id INNER JOIN narinfo_signature_data ON narinfo_signature_data.id = narinfo_signatures.narinfo_signature_data_id INNER JOIN narinfo_fetch_records ON narinfo_signature_data.id = narinfo_fetch_records.narinfo_signature_data_id WHERE nars.store_path = derivation_output_details.path ), ARRAY[]::integer[])") "") (cond ((string=? output-consistency "any") "") ((string=? output-consistency "fixed-output") " AND derivation_output_details.hash IS NOT NULL") (else (string-append " AND derivation_output_details.hash IS NULL AND ( SELECT " (cond ((string=? output-consistency "unknown") "COUNT(DISTINCT narinfo_fetch_records.build_server_id) <= 1") ((string=? output-consistency "matching") " CASE WHEN (COUNT(DISTINCT narinfo_fetch_records.build_server_id) <= 1) THEN NULL ELSE (COUNT(DISTINCT nars.hash) = 1) END") ((string=? output-consistency "not-matching") " CASE WHEN (COUNT(DISTINCT narinfo_fetch_records.build_server_id) <= 1) THEN NULL ELSE (COUNT(DISTINCT nars.hash) > 1) END") (else (error "unknown reproducibility status"))) " FROM nars INNER JOIN narinfo_signatures ON nars.id = narinfo_signatures.nar_id INNER JOIN narinfo_signature_data ON narinfo_signature_data.id = narinfo_signatures.narinfo_signature_data_id INNER JOIN narinfo_fetch_records ON narinfo_signature_data.id = narinfo_fetch_records.narinfo_signature_data_id WHERE nars.store_path = derivation_output_details.path )"))) " ORDER BY derivation_output_details.path " (if limit-results (string-append " LIMIT " (number->string limit-results)) ""))) (map (match-lambda ((package_name package_version path hash_algorithm hash recursive nars_json) (list package_name package_version path hash hash_algorithm (string=? recursive "t") (if (null? nars_json) #() (json-string->scm nars_json)))) ((package_name package_version path hash_algorithm hash recursive) (list package_name package_version path hash hash_algorithm (string=? recursive "t")))) (exec-query-with-null-handling conn query `(,commit-hash ,@(if after-path (list after-path) '()) ,@(if system (list system) '()) ,@(if target (list target) '()) ,@(if search-query (list (string-append "%" search-query "%")) '()))))) (define (fix-derivation-output-details-hash-encoding conn) (define (find-old-derivations-and-hashes conn) (exec-query conn " SELECT id, hash FROM derivation_output_details WHERE hash_algorithm = 'sha256' AND char_length(hash) = 52 LIMIT 100")) (define (fix-batch data) (for-each (match-lambda ((id base32-hash) (exec-query conn " UPDATE derivation_output_details SET hash = $2 WHERE id = $1" (list id (bytevector->base16-string (nix-base32-string->bytevector base32-hash)))))) data)) (unless (null? (find-old-derivations-and-hashes conn)) (with-postgresql-transaction conn (lambda (conn) (exec-query conn " LOCK TABLE ONLY derivation_output_details IN SHARE ROW EXCLUSIVE MODE") (let loop ((data (find-old-derivations-and-hashes conn))) (unless (null? data) (fix-batch data) (simple-format #t "updated ~A old hashes\n" (length data)) ;; Recurse in case there are more to fix (loop (find-old-derivations-and-hashes conn)))))))) (define (derivation-output-details->derivation-output-details-ids conn derivation-output-details) (insert-missing-data-and-return-all-ids conn "derivation_output_details" '(path hash_algorithm hash recursive) (map (lambda (details) (list (assq-ref details 'path) (or (non-empty-string-or-false (assq-ref details 'hash_algorithm)) NULL) (or (non-empty-string-or-false (assq-ref details 'hash)) NULL) (assq-ref details 'recursive))) derivation-output-details))) (define (derivation-output-details-ids->derivation-output-details-set-id conn derivation-output-details-ids) (define sorted-derivation-output-details-ids (sort derivation-output-details-ids <)) (define (select-derivation-output-details-sets-id) (match (exec-query conn (string-append " SELECT id FROM derivation_output_details_sets WHERE derivation_output_details_ids = ARRAY[" (string-join (map number->string sorted-derivation-output-details-ids) ",") "]")) (((id)) (string->number id)) (_ #f))) (define (insert-into-derivation-output-details-sets) (match (exec-query conn (string-append " INSERT INTO derivation_output_details_sets (derivation_output_details_ids) VALUES (ARRAY[" (string-join (map number->string sorted-derivation-output-details-ids) ",") "]) RETURNING id")) (((id)) (string->number id)))) (or (select-derivation-output-details-sets-id) (insert-into-derivation-output-details-sets))) (define (insert-derivation-outputs conn derivation-id names-and-derivation-outputs) (define (insert-into-derivation-outputs output-names derivation-output-details-ids) (string-append "INSERT INTO derivation_outputs " "(derivation_id, name, derivation_output_details_id) VALUES " (string-join (map (lambda (output-name derivation-output-details-id) (simple-format #f "(~A, '~A', ~A)" derivation-id output-name derivation-output-details-id)) output-names derivation-output-details-ids) ",") ";")) (define (insert-into-derivations-by-output-details-set derivation_output_details_set_id) (exec-query conn " INSERT INTO derivations_by_output_details_set (derivation_id, derivation_output_details_set_id) VALUES ($1, $2)" (list (number->string derivation-id) (number->string derivation_output_details_set_id)))) (let* ((derivation-outputs (map cdr names-and-derivation-outputs)) (derivation-output-paths (map derivation-output-path derivation-outputs)) (derivation-output-names (map car names-and-derivation-outputs)) (derivation-output-details-ids (derivation-output-details->derivation-output-details-ids conn (map (match-lambda (($ path hash-algo hash recursive?) `((path . ,path) (hash_algorithm . ,(or (and=> hash-algo symbol->string) NULL)) (hash . ,(or (and=> hash bytevector->base16-string) NULL)) (recursive . ,recursive?)))) derivation-outputs)))) (exec-query conn (insert-into-derivation-outputs derivation-output-names derivation-output-details-ids)) (insert-into-derivations-by-output-details-set (derivation-output-details-ids->derivation-output-details-set-id conn derivation-output-details-ids)) derivation-output-details-ids)) (define (select-derivation-by-file-name-hash conn file-name-hash) (define query (string-append "SELECT derivations.id, file_name, builder, args, to_json(env_vars), system " "FROM derivations " "INNER JOIN systems ON derivations.system_id = systems.id " "WHERE substring(file_name from 12 for 32) = $1")) (match (exec-query conn query (list file-name-hash)) (() #f) (((id file_name builder args env_vars system)) (list (string->number id) file_name builder (parse-postgresql-array-string args) (map (match-lambda (#(key value) `((key . ,key) (value . ,value)))) (vector->list (json-string->scm env_vars))) system)))) (define (select-derivation-by-file-name conn file-name) (define query (string-append "SELECT derivations.id, file_name, builder, args, to_json(env_vars), system " "FROM derivations " "INNER JOIN systems ON derivations.system_id = systems.id " "WHERE file_name = $1")) (match (exec-query conn query (list file-name)) (() #f) (((id file_name builder args env_vars system)) (list (string->number id) file_name builder (parse-postgresql-array-string args) (map (match-lambda (#(key value) `((key . ,key) (value . ,value)))) (vector->list (json-string->scm env_vars))) system)))) (define (select-derivation-outputs-by-derivation-id conn id) (define query (string-append " SELECT derivation_outputs.name, derivation_output_details.path, derivation_output_details.hash_algorithm, derivation_output_details.hash, derivation_output_details.recursive FROM derivation_outputs INNER JOIN derivation_output_details ON derivation_outputs.derivation_output_details_id = derivation_output_details.id WHERE derivation_id = $1 ORDER BY derivation_outputs.name")) (map (match-lambda ((name path hash_algorithm hash recursive) (list name path hash_algorithm hash (string=? recursive "t")))) (exec-query conn query (list (number->string id))))) (define (select-derivation-outputs-by-derivation-file-name conn file-name) (define query (string-append " SELECT derivation_outputs.name, derivation_output_details.path, derivation_output_details.hash_algorithm, derivation_output_details.hash, derivation_output_details.recursive FROM derivation_outputs INNER JOIN derivation_output_details ON derivation_outputs.derivation_output_details_id = derivation_output_details.id INNER JOIN derivations ON derivation_outputs.derivation_id = derivations.id WHERE derivations.file_name = $1 ORDER BY derivation_outputs.name")) (map (match-lambda ((name path hash_algorithm hash recursive) (list name path hash_algorithm hash (string=? recursive "t")))) (exec-query conn query (list file-name)))) (define (select-derivation-inputs-by-derivation-id conn id) (define query (string-append " SELECT derivations.file_name, JSON_AGG( json_build_object( 'output_name', derivation_outputs.name, 'store_filename', derivation_output_details.path ) ORDER BY derivation_outputs.name ) FROM derivation_inputs INNER JOIN derivation_outputs ON derivation_outputs.id = derivation_inputs.derivation_output_id INNER JOIN derivation_output_details ON derivation_outputs.derivation_output_details_id = derivation_output_details.id INNER JOIN derivations ON derivation_outputs.derivation_id = derivations.id WHERE derivation_inputs.derivation_id = $1 GROUP BY derivations.file_name ORDER BY derivations.file_name")) (map (match-lambda ((derivation-file-name outputs-json) (list derivation-file-name (json-string->scm outputs-json)))) (exec-query conn query (list (number->string id))))) (define (select-derivation-sources-by-derivation-id conn id) (define query (string-append " SELECT derivation_source_files.store_path FROM derivation_source_files INNER JOIN derivation_sources ON derivation_source_files.id = derivation_sources.derivation_source_file_id WHERE derivation_sources.derivation_id = $1 ORDER BY 1")) (map first (exec-query conn query (list (number->string id))))) (define (select-derivation-references-by-derivation-id conn id) (define query (string-append " SELECT * FROM ( SELECT derivation_source_files.store_path FROM derivation_source_files INNER JOIN derivation_sources ON derivation_source_files.id = derivation_sources.derivation_source_file_id WHERE derivation_sources.derivation_id = $1 UNION ALL SELECT derivations.file_name FROM derivation_inputs INNER JOIN derivation_outputs ON derivation_outputs.id = derivation_inputs.derivation_output_id INNER JOIN derivation_output_details ON derivation_outputs.derivation_output_details_id = derivation_output_details.id INNER JOIN derivations ON derivation_outputs.derivation_id = derivations.id WHERE derivation_inputs.derivation_id = $1 GROUP BY derivations.file_name ) AS data ORDER BY 1")) (map first (exec-query conn query (list (number->string id))))) (define (select-derivation-source-file-by-store-path conn store-path) (define query " SELECT id FROM derivation_source_files WHERE store_path = $1") (map car (exec-query conn query (list store-path)))) (define (select-derivation-source-file-data-by-file-name-hash conn hash) (match (exec-query conn " SELECT derivation_source_files.store_path, derivation_source_file_nars.compression, length(derivation_source_file_nars.data) AS compressed_size, derivation_source_file_nars.hash_algorithm, derivation_source_file_nars.hash, derivation_source_file_nars.uncompressed_size FROM derivation_source_file_nars INNER JOIN derivation_source_files ON derivation_source_file_nars.derivation_source_file_id = derivation_source_files.id WHERE substring(derivation_source_files.store_path from 12 for 32) = $1" (list hash)) (((store_path compression compressed_size hash_algorithm hash uncompressed_size)) (list store_path compression (string->number compressed_size) hash_algorithm hash (string->number uncompressed_size))) (() #f))) (define (select-derivation-source-file-nar-details-by-file-name conn file-name) (match (exec-query conn " SELECT compression, hash_algorithm, hash, uncompressed_size, length(data) AS compressed_size FROM derivation_source_file_nars INNER JOIN derivation_source_files ON derivation_source_file_nars.derivation_source_file_id = derivation_source_files.id WHERE derivation_source_files.store_path = $1" (list file-name)) (((compression hash_algorithm hash uncompressed_size compressed_size)) `((compression . ,compression) (hash_algorithm . ,hash_algorithm) (hash . ,hash) (uncompressed_size . ,(string->number uncompressed_size)) (compressed_size . ,(string->number compressed_size)))) (() #f))) (define (select-derivation-source-file-nar-data-by-file-name conn file-name) (match (exec-query conn " SELECT data FROM derivation_source_file_nars INNER JOIN derivation_source_files ON derivation_source_file_nars.derivation_source_file_id = derivation_source_files.id WHERE derivation_source_files.store_path = $1" (list file-name)) (((data)) (base16-string->bytevector ;; Drop \x from the start of the string (string-drop data 2))) (() #f))) (define (select-serialized-derivation-by-file-name conn derivation-file-name) (define (double-quote s) (string-append "\"" s "\"")) (define (round-brackets-list items) (string-append "(" (string-join items ",") ")")) (define (square-brackets-list items) (string-append "[" (string-join items ",") "]")) (let ((derivation (select-derivation-by-file-name conn derivation-file-name))) (if derivation (let ((derivation-inputs (select-derivation-inputs-by-derivation-id conn (first derivation))) (derivation-outputs (select-derivation-outputs-by-derivation-id conn (first derivation))) (derivation-sources (select-derivation-sources-by-derivation-id conn (first derivation)))) (string-append "Derive" (round-brackets-list `(;; Outputs ,(square-brackets-list (map (match-lambda ((output-name path hash-algorithm hash recursive?) (round-brackets-list (list (double-quote output-name) (double-quote path) (double-quote (string-append (if recursive? "r:" "") (or hash-algorithm ""))) (double-quote (or hash "")))))) derivation-outputs)) ;; Inputs ,(square-brackets-list (map (match-lambda ((file-name outputs) (round-brackets-list (list (double-quote file-name) (square-brackets-list (map (lambda (output) (double-quote (assoc-ref output "output_name"))) (vector->list outputs))))))) derivation-inputs)) ;; Sources ,(square-brackets-list (map double-quote derivation-sources)) ;; Other parts ,@(match derivation ((id file-name builder args env-vars system) (list (double-quote system) (double-quote builder) (square-brackets-list (map double-quote args)) (square-brackets-list (map (lambda (env-var) (round-brackets-list (list (with-output-to-string (lambda () (write (assq-ref env-var 'key)))) (with-output-to-string (lambda () (write (assq-ref env-var 'value))))))) env-vars))))))))) #f))) (define (insert-derivation-inputs conn derivation-ids derivations) (let ((query-parts (append-map! (lambda (derivation-id derivation) (append-map! (match-lambda (($ derivation-or-path sub-derivations) (let ((path (match derivation-or-path ((? derivation? d) ;; The first field changed to a derivation (from the file ;; name) in 5cf4b26d52bcea382d98fb4becce89be9ee37b55 (derivation-file-name d)) ((? string? s) s)))) (map (lambda (sub-derivation) (string-append "(" (number->string derivation-id) ", '" path "', '" sub-derivation "')")) sub-derivations)))) (derivation-inputs derivation))) derivation-ids derivations))) (chunk-for-each! (lambda (query-parts-chunk) (exec-query conn (string-append " INSERT INTO derivation_inputs (derivation_id, derivation_output_id) SELECT vals.derivation_id, derivation_outputs.id FROM (VALUES " (string-join query-parts-chunk ", ") ") AS vals (derivation_id, file_name, output_name) INNER JOIN derivations ON derivations.file_name = vals.file_name INNER JOIN derivation_outputs ON derivation_outputs.derivation_id = derivations.id AND vals.output_name = derivation_outputs.name"))) 1000 query-parts))) (define (select-from-derivation-source-files store-paths) (string-append "SELECT id, store_path FROM derivation_source_files " "WHERE store_path IN (" (string-join (map quote-string store-paths) ",") ");")) (define (insert-derivation-sources conn derivation-id sources) (define (insert-into-derivation-sources derivation-source-file-ids) (string-append "INSERT INTO derivation_sources " "(derivation_id, derivation_source_file_id) VALUES " (string-join (map (lambda (derivation-source-file-id) (simple-format #f "(~A, ~A)" derivation-id derivation-source-file-id)) derivation-source-file-ids) ",") ";")) (let ((sources-ids (insert-missing-data-and-return-all-ids conn "derivation_source_files" '(store_path) (map list sources)))) (exec-query conn (insert-into-derivation-sources sources-ids)) sources-ids)) (define (insert-derivation-source-file-nar conn id source-file) (define missing? (match (exec-query conn "SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (list (number->string id))) (() #t) (_ #f))) (when missing? (let* ((nar-bytevector (call-with-values (lambda () (open-bytevector-output-port)) (lambda (port get-bytevector) (write-file source-file port) (get-bytevector)))) (data-string (bytevector->base16-string (call-with-values (lambda () (open-bytevector-output-port)) (lambda (port get-bytevector) (call-with-lzip-output-port port (lambda (port) (put-bytevector port nar-bytevector)) #:level 9) (get-bytevector))))) (hash (bytevector->nix-base32-string (sha256 nar-bytevector))) (uncompressed-size (bytevector-length nar-bytevector))) (exec-query conn " INSERT INTO derivation_source_file_nars ( derivation_source_file_id, compression, hash_algorithm, hash, uncompressed_size, data ) VALUES ($1, $2, $3, $4, $5, $6)" (list (number->string id) "lzip" "sha256" hash (number->string uncompressed-size) (string-append "\\x" data-string)))))) (define* (backfill-derivation-source-file-nars conn #:key (batch-size 10000) (loop? #t)) (define (missing-batch) (exec-query conn " SELECT id, store_path FROM derivation_source_files WHERE id NOT IN ( SELECT derivation_source_file_id FROM derivation_source_file_nars ) LIMIT $1" (list (number->string batch-size)))) (let loop ((batch (missing-batch))) (unless (null? batch) (for-each (match-lambda ((id source-file) (if (file-exists? source-file) (begin (insert-derivation-source-file-nar conn (string->number id) source-file) (simple-format #t "inserting ~A\n" source-file)) (simple-format #t "missing ~A\n" source-file)))) batch) (when loop? (loop (missing-batch)))))) (define (insert-missing-derivations conn derivation-ids-hash-table derivations) (define (ensure-input-derivations-exist input-derivation-file-names) (unless (null? input-derivation-file-names) (simple-format #t "debug: ensure-input-derivations-exist: processing ~A derivations\n" (length input-derivation-file-names)) (update-derivation-ids-hash-table! conn derivation-ids-hash-table (list->vector input-derivation-file-names)) (simple-format #t "debug: ensure-input-derivations-exist: checking for missing input derivations\n") (let ((missing-derivations-filenames (filter (lambda (derivation-file-name) (not (hash-ref derivation-ids-hash-table derivation-file-name))) input-derivation-file-names))) (unless (null? missing-derivations-filenames) (simple-format #f "debug: ensure-input-derivations-exist: inserting missing input derivations\n") ;; Ensure all the input derivations exist (insert-missing-derivations conn derivation-ids-hash-table (map read-derivation-from-file missing-derivations-filenames)))))) (define (insert-into-derivations dervs) (string-append "INSERT INTO derivations " "(file_name, builder, args, env_vars, system_id) VALUES " (string-join (map (match-lambda (($ outputs inputs sources system builder args env-vars file-name) (simple-format #f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')" file-name builder (string-join (map quote-string args) ",") (string-join (map (match-lambda ((key . value) (string-append "['" key '"', $$" value "$$ ]"))) env-vars) ",") (system->system-id conn system)))) dervs) ",") " RETURNING id" ";")) (simple-format #t "debug: insert-missing-derivations: inserting ~A derivations\n" (length derivations)) (let ((derivation-ids (append-map (lambda (chunk) (map (lambda (result) (string->number (car result))) (exec-query conn (insert-into-derivations chunk)))) (chunk derivations 500)))) (simple-format #t "debug: insert-missing-derivations: updating hash table\n") (for-each (lambda (derivation derivation-id) (hash-set! derivation-ids-hash-table (derivation-file-name derivation) derivation-id)) derivations derivation-ids) (simple-format #t "debug: insert-missing-derivations: inserting outputs\n") (for-each (lambda (derivation-id derivation) (insert-derivation-outputs conn derivation-id (derivation-outputs derivation))) derivation-ids derivations) (simple-format #t "debug: insert-missing-derivations: inserting sources\n") (for-each (lambda (derivation-id derivation) (let ((sources (derivation-sources derivation))) (unless (null? sources) (let ((sources-ids (insert-derivation-sources conn derivation-id sources))) (map (lambda (id source-file) (insert-derivation-source-file-nar conn id source-file)) sources-ids sources))))) derivation-ids derivations) (simple-format #t "debug: insert-missing-derivations: ensure-input-derivations-exist\n") (ensure-input-derivations-exist (deduplicate-strings (map derivation-input-path (append-map derivation-inputs derivations)))) (with-time-logging (simple-format #f "insert-missing-derivations: inserting inputs for ~A derivations" (length derivations)) (insert-derivation-inputs conn derivation-ids derivations)) derivation-ids)) (define (select-derivations-by-id conn ids) (define query (string-append "SELECT id, file_name " "FROM derivations " "WHERE id IN " "(" (string-join (map (lambda (id) (simple-format #f "'~A'" id)) ids) ",") ");")) (exec-query conn query)) (define* (select-derivations-and-build-status conn #:key file-names systems targets build-statuses) (define criteria (string-join (filter-map (lambda (field values) (if values (string-append field " IN (" (string-join (map (lambda (value) (simple-format #f "'~A'" value)) values) ",") ")") #f)) '("derivations.file_name" "systems.system" "target" "latest_build_status.status") (list (deduplicate-strings file-names) systems targets build-statuses)) " AND ")) (define query (string-append " SELECT derivations.file_name, systems.system, package_derivations.target, latest_build_status.status FROM derivations INNER JOIN systems ON derivations.system_id = systems.id INNER JOIN package_derivations ON derivations.id = package_derivations.derivation_id INNER JOIN derivations_by_output_details_set ON derivations.id = derivations_by_output_details_set.derivation_id LEFT OUTER JOIN builds ON derivations.derivation_output_details_set_id = builds.derivation_output_details_set_id LEFT OUTER JOIN latest_build_status ON builds.id = latest_build_status.build_id WHERE " criteria ";")) (exec-query conn query)) (define (deduplicate-derivations derivations) (define sorted-derivations (sort derivations (lambda (a b) (stringnumber id)))) (exec-query conn (select-existing-derivations chunk)))) (chunk! missing-file-names 1000))))) (define (derivation-file-names->derivation-ids conn derivation-file-names) (define derivations-count (vector-length derivation-file-names)) (define (insert-source-files-missing-nars derivation-ids) (define (derivation-ids->next-related-derivation-ids! ids seen-ids) (delete-duplicates/sort! (append-map! (lambda (ids-chunk) (let ((query (string-append " SELECT derivation_outputs.derivation_id FROM derivation_inputs INNER JOIN derivation_outputs ON derivation_outputs.id = derivation_inputs.derivation_output_id WHERE derivation_inputs.derivation_id IN (" (string-join (map number->string ids) ",") ")"))) (filter-map (lambda (row) (let ((number (string->number (car row)))) (if (hash-ref seen-ids number) #f (begin (hash-set! seen-ids number #t) number)))) (exec-query conn query)))) (chunk! ids 500)) < =)) (define (derivation-ids->missing-sources ids) (define query (string-append " SELECT derivation_sources.derivation_source_file_id, derivation_source_files.store_path FROM derivation_sources LEFT JOIN derivation_source_file_nars ON derivation_sources.derivation_source_file_id = derivation_source_file_nars.derivation_source_file_id INNER JOIN derivation_source_files ON derivation_sources.derivation_source_file_id = derivation_source_files.id WHERE derivation_sources.derivation_id IN (" (string-join (map number->string ids) ", ") ") AND derivation_source_file_nars.derivation_source_file_id IS NULL")) (map (lambda (row) (list (string->number (first row)) (second row))) (exec-query conn query))) (let ((seen-ids (make-hash-table))) (let loop ((next-related-derivation-ids (derivation-ids->next-related-derivation-ids! (list-copy derivation-ids) seen-ids))) (unless (null? next-related-derivation-ids) (let ((missing-sources (append-map! derivation-ids->missing-sources (chunk next-related-derivation-ids 10000)))) (unless (null? missing-sources) (with-time-logging (simple-format #f "inserting ~A missing source files" (length missing-sources)) (for-each (match-lambda ((derivation-source-file-id store-path) (insert-derivation-source-file-nar conn derivation-source-file-id store-path))) missing-sources)))) (loop (derivation-ids->next-related-derivation-ids! next-related-derivation-ids seen-ids)))))) (if (= 0 derivations-count) #() (let* ((derivation-ids-hash-table (make-hash-table ;; Account for more derivations in ;; the graph (* 2 derivations-count)))) (simple-format #t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n" derivations-count) (update-derivation-ids-hash-table! conn derivation-ids-hash-table derivation-file-names) (let ((missing-derivation-filenames (deduplicate-strings (vector-fold (lambda (_ result derivation-file-name) (if (not derivation-file-name) result (if (hash-ref derivation-ids-hash-table derivation-file-name) result (cons derivation-file-name result)))) '() derivation-file-names)))) (chunk-for-each! (lambda (missing-derivation-filenames-chunk) (let ((missing-derivations-chunk (with-time-logging (simple-format #f "reading ~A missing derivations" (length missing-derivation-filenames-chunk)) (map read-derivation-from-file ;; Do the filter again, since processing the last ;; chunk might have inserted some of the ;; derivations in this chunk (filter (lambda (derivation-file-name) (not (hash-ref derivation-ids-hash-table derivation-file-name))) missing-derivation-filenames-chunk))))) (unless (null? missing-derivations-chunk) (insert-missing-derivations conn derivation-ids-hash-table missing-derivations-chunk)))) 1000 missing-derivation-filenames) (let ((all-ids (vector-map (lambda (_ derivation-file-name) (if derivation-file-name (or (hash-ref derivation-ids-hash-table derivation-file-name) (error "missing derivation id")) #f)) derivation-file-names))) (with-time-logging "insert-source-files-missing-nars" (insert-source-files-missing-nars ;; TODO Avoid this conversion (vector-fold (lambda (_ result x) (if x (cons x result) result)) '() all-ids))) all-ids))))) (define (update-derivation-inputs-statistics conn) (let ((query " SELECT COUNT(DISTINCT derivation_id), COUNT(DISTINCT derivation_output_id) FROM derivation_inputs")) (match (exec-query conn query) (((derivation_id_count derivation_output_id_count)) (exec-query conn (simple-format #f " ALTER TABLE derivation_inputs ALTER COLUMN derivation_id SET (n_distinct = ~A)" derivation_id_count)) (exec-query conn (simple-format #f " ALTER TABLE derivation_inputs ALTER COLUMN derivation_output_id SET (n_distinct = ~A)" derivation_output_id_count)))))) (define (vacuum-derivation-inputs-table conn) (exec-query conn "VACUUM (VERBOSE, ANALYZE) derivation_inputs")) (define (update-derivation-outputs-statistics conn) (let ((query " SELECT COUNT(DISTINCT derivation_id), COUNT(*) FROM derivation_outputs")) (match (exec-query conn query) (((derivation_id_count all_count)) (unless (< (string->number all_count) 1) (exec-query conn (format #f " ALTER TABLE derivation_outputs ALTER COLUMN derivation_id SET (n_distinct = ~7f)" (* -1 (/ (string->number derivation_id_count) (string->number all_count)))))))))) (define (vacuum-derivation-outputs-table conn) (exec-query conn "VACUUM (VERBOSE, ANALYZE) derivation_outputs"))