diff options
author | Christopher Baines <mail@cbaines.net> | 2020-02-04 00:08:49 +0000 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2020-02-04 00:08:49 +0000 |
commit | a3c5f53fcdf0852400788375ef7d352b71eaebe9 (patch) | |
tree | b80a142ba236eb53ec179afef8870c5d3933fe79 /guix-data-service | |
parent | a3d84b1aec667770d27016950f0c310b170f5ba3 (diff) | |
download | data-service-a3c5f53fcdf0852400788375ef7d352b71eaebe9.tar data-service-a3c5f53fcdf0852400788375ef7d352b71eaebe9.tar.gz |
Support using a temp table in insert-missing-data-and-return-all-ids
This is more performant when there is a lot of data.
Diffstat (limited to 'guix-data-service')
-rw-r--r-- | guix-data-service/model/utils.scm | 90 |
1 files changed, 83 insertions, 7 deletions
diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm index fc6d77c..1dc62ee 100644 --- a/guix-data-service/model/utils.scm +++ b/guix-data-service/model/utils.scm @@ -120,7 +120,8 @@ data #:key sets-of-data? - delete-duplicates?) + delete-duplicates? + use-temporary-table?) (define field-strings (map symbol->string fields)) @@ -177,7 +178,28 @@ field-strings) " AND "))) - (define (insert-sql missing-data) + (define (temp-table-select-query temp-table-name) + (string-append + "SELECT " table-name ".id, " + (string-join (map (lambda (field) + (string-append table-name "." field)) + field-strings) + ", ") + " FROM " table-name + " INNER JOIN " temp-table-name + " ON " + (string-join + (map (lambda (field) + (string-append + "(" table-name "." field " = " temp-table-name "." field + " OR (" table-name "." field " IS NULL AND" + " " temp-table-name "." field " IS NULL))")) + field-strings) + " AND "))) + + (define* (insert-sql missing-data + #:key + (table-name table-name)) (string-append "INSERT INTO " table-name " (" (string-join field-strings ", ") @@ -226,12 +248,66 @@ (error (simple-format #f "normalise-values: error: ~A\n" unknown)))) data)) + (define (log-time action f) + (simple-format #t "debug: Starting ~A\n" action) + (let* ((start-time (current-time)) + (result (f)) + (time-taken (- (current-time) start-time))) + (simple-format #t "debug: Finished ~A, took ~A seconds\n" + action time-taken) + result)) + (let* ((existing-entries - (exec-query->vhash conn - select-query - cdr - (lambda (result) - (string->number (first result))))) + (if use-temporary-table? + (let ((temp-table-name + (string-append "temp_" table-name)) + (data + (if sets-of-data? + (delete-duplicates (concatenate data)) + (if delete-duplicates? + (delete-duplicates data) + data)))) + ;; Create a temporary table to store the data + (exec-query + conn + (string-append "CREATE TEMPORARY TABLE " + temp-table-name + " (LIKE " + table-name + " INCLUDING ALL)")) + (exec-query + conn + (string-append + "ANALYZE " temp-table-name)) + + ;; Populate the temporary table + (log-time + (string-append "populating " temp-table-name) + (lambda () + (exec-query conn + (insert-sql data + #:table-name temp-table-name)))) + ;; Use the temporary table to find the existing values + (let ((result + (log-time + (string-append "querying the " temp-table-name) + (lambda () + (exec-query->vhash + conn + (temp-table-select-query temp-table-name) + cdr + (lambda (result) + (string->number (first result)))))))) + + (exec-query conn (string-append "DROP TABLE " temp-table-name)) + result)) + + ;; If not using a temporary table, just do a single SELECT query + (exec-query->vhash conn + select-query + cdr + (lambda (result) + (string->number (first result)))))) (missing-entries (filter (lambda (field-values) (not (vhash-assoc |