;;; Guix Data Service -- Information about Guix over time ;;; Copyright © 2019 Christopher Baines ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU Affero General Public License ;;; as published by the Free Software Foundation, either version 3 of ;;; the License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with this program. If not, see ;;; . (define-module (guix-data-service model utils) #:use-module (srfi srfi-1) #:use-module (ice-9 match) #:use-module (ice-9 vlist) #:use-module (ice-9 receive) #:use-module (squee) #:use-module (guix-data-service database) #:export (NULL quote-string value->quoted-string-or-null non-empty-string-or-false exec-query->vhash two-lists->vhash parse-postgresql-array-string deduplicate-strings group-list-by-first-n-fields group-to-alist insert-missing-data-and-return-all-ids)) (define NULL '()) (define (quote-string s) (string-append "$STR$" s "$STR$")) (define (value->quoted-string-or-null value) (if (string? value) (string-append "$STR$" value "$STR$") "NULL")) (define (non-empty-string-or-false s) (if (string? s) (if (string-null? s) #f s) #f)) (define (exec-query->vhash conn query field-function value-function) (fold (lambda (row result) (vhash-cons (field-function row) (value-function row) result)) vlist-null (exec-query-with-null-handling conn query))) (define (two-lists->vhash l1 l2) (fold (lambda (key value result) (vhash-cons key value result)) vlist-null l1 l2)) (define (parse-postgresql-array-string s) (if (string=? s "{}") '() (string-split (string-drop-right (string-drop s 1) 1) #\,))) (define (deduplicate-strings strings) (pair-fold (lambda (pair result) (if (null? (cdr pair)) (cons (first pair) result) (if (string=? (first pair) (second pair)) result (cons (first pair) result)))) '() (sort strings stringstring fields)) (define value->sql (match-lambda ((? string? s) (string-append "$STR$" s "$STR$")) ((? symbol? s) (string-append "$STR$" (symbol->string s) "$STR$")) ((? number? n) (number->string n)) ((? boolean? b) (if b "TRUE" "FALSE")) ((? null?) "NULL") ((cast . value) (string-append (value->sql value) "::" cast)) (v (error (simple-format #f "error: unknown type for value: ~A" v))))) (define select-query (string-append "SELECT id, " (string-join (map (lambda (field) (string-append table-name "." field)) field-strings) ", ") " FROM " table-name " JOIN (VALUES " (string-join (map (lambda (field-values) (string-append "(" (string-join (map value->sql field-values) ",") ")")) (if sets-of-data? (delete-duplicates (concatenate data)) data)) ", ") ") AS vals (" (string-join field-strings ", ") ") " "ON " (string-join (map (lambda (field) (string-append "(" table-name "." field " = vals." field " OR (" table-name "." field " IS NULL AND" " vals." field " IS NULL))")) field-strings) " AND "))) (define (temp-table-select-query temp-table-name) (string-append "SELECT " table-name ".id, " (string-join (map (lambda (field) (string-append table-name "." field)) field-strings) ", ") " FROM " table-name " INNER JOIN " temp-table-name " ON " (string-join (map (lambda (field) (string-append "(" table-name "." field " = " temp-table-name "." field " OR (" table-name "." field " IS NULL AND" " " temp-table-name "." field " IS NULL))")) field-strings) " AND "))) (define* (insert-sql missing-data #:key (table-name table-name)) (string-append "INSERT INTO " table-name " (" (string-join field-strings ", ") ") VALUES " (string-join (map (lambda (field-values) (string-append "(" (string-join (map (lambda (value) (value->sql value)) field-values) ", ") ")")) missing-data) ", ") " RETURNING id")) (define (format-json json) ;; PostgreSQL formats JSON strings differently to guile-json, so use ;; PostgreSQL to do the formatting (caar (exec-query conn (string-append "SELECT $STR$" json "$STR$::jsonb")))) (define (normalise-values data) (map (match-lambda ((? boolean? b) (if b "t" "f")) ((? number? n) (number->string n)) ((? symbol? s) (symbol->string s)) ((? string? s) s) ((? null? n) ;; exec-query-with-null-handling specifies NULL values as '() n) ((cast . value) (if (string=? cast "jsonb") (format-json value) value)) (unknown (error (simple-format #f "normalise-values: error: ~A\n" unknown)))) data)) (define (log-time action f) (simple-format #t "debug: Starting ~A\n" action) (let* ((start-time (current-time)) (result (f)) (time-taken (- (current-time) start-time))) (simple-format #t "debug: Finished ~A, took ~A seconds\n" action time-taken) result)) (let* ((existing-entries (if use-temporary-table? (let ((temp-table-name (string-append "temp_" table-name)) (data (if sets-of-data? (delete-duplicates (concatenate data)) (if delete-duplicates? (delete-duplicates data) data)))) ;; Create a temporary table to store the data (exec-query conn (string-append "CREATE TEMPORARY TABLE " temp-table-name " (LIKE " table-name " INCLUDING ALL)")) (exec-query conn (string-append "ANALYZE " temp-table-name)) ;; Populate the temporary table (log-time (string-append "populating " temp-table-name) (lambda () (exec-query conn (insert-sql data #:table-name temp-table-name)))) ;; Use the temporary table to find the existing values (let ((result (log-time (string-append "querying the " temp-table-name) (lambda () (exec-query->vhash conn (temp-table-select-query temp-table-name) cdr (lambda (result) (string->number (first result)))))))) (exec-query conn (string-append "DROP TABLE " temp-table-name)) result)) ;; If not using a temporary table, just do a single SELECT query (exec-query->vhash conn select-query cdr (lambda (result) (string->number (first result)))))) (missing-entries (filter (lambda (field-values) (not (vhash-assoc ;; Normalise at this point, so that the proper value ;; to insert is carried forward (normalise-values field-values) existing-entries))) (if sets-of-data? (delete-duplicates (concatenate data)) (if delete-duplicates? (delete-duplicates data) data)))) (new-entries (if (null? missing-entries) '() (map (lambda (result) (string->number (first result))) (exec-query conn (insert-sql missing-entries))))) (new-entries-lookup-vhash (two-lists->vhash missing-entries new-entries))) (if sets-of-data? (map (lambda (field-value-lists) ;; Normalise the result at this point, ensuring that the id's ;; in the set are sorted (sort (map (lambda (field-values) (cdr (or (vhash-assoc (normalise-values field-values) existing-entries) (vhash-assoc field-values new-entries-lookup-vhash) (error "missing entry" field-values)))) field-value-lists) <)) data) (map (lambda (field-values) (cdr (or (vhash-assoc (normalise-values field-values) existing-entries) (vhash-assoc field-values new-entries-lookup-vhash) (error "missing entry" field-values)))) data))))