aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2020-02-04 00:08:49 +0000
committerChristopher Baines <mail@cbaines.net>2020-02-04 00:08:49 +0000
commita3c5f53fcdf0852400788375ef7d352b71eaebe9 (patch)
treeb80a142ba236eb53ec179afef8870c5d3933fe79
parenta3d84b1aec667770d27016950f0c310b170f5ba3 (diff)
downloaddata-service-a3c5f53fcdf0852400788375ef7d352b71eaebe9.tar
data-service-a3c5f53fcdf0852400788375ef7d352b71eaebe9.tar.gz
Support using a temp table in insert-missing-data-and-return-all-ids
This is more performant when there is a lot of data.
-rw-r--r--guix-data-service/model/utils.scm90
1 files changed, 83 insertions, 7 deletions
diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm
index fc6d77c..1dc62ee 100644
--- a/guix-data-service/model/utils.scm
+++ b/guix-data-service/model/utils.scm
@@ -120,7 +120,8 @@
data
#:key
sets-of-data?
- delete-duplicates?)
+ delete-duplicates?
+ use-temporary-table?)
(define field-strings
(map symbol->string fields))
@@ -177,7 +178,28 @@
field-strings)
" AND ")))
- (define (insert-sql missing-data)
+ (define (temp-table-select-query temp-table-name)
+ (string-append
+ "SELECT " table-name ".id, "
+ (string-join (map (lambda (field)
+ (string-append table-name "." field))
+ field-strings)
+ ", ")
+ " FROM " table-name
+ " INNER JOIN " temp-table-name
+ " ON "
+ (string-join
+ (map (lambda (field)
+ (string-append
+ "(" table-name "." field " = " temp-table-name "." field
+ " OR (" table-name "." field " IS NULL AND"
+ " " temp-table-name "." field " IS NULL))"))
+ field-strings)
+ " AND ")))
+
+ (define* (insert-sql missing-data
+ #:key
+ (table-name table-name))
(string-append
"INSERT INTO " table-name " ("
(string-join field-strings ", ")
@@ -226,12 +248,66 @@
(error (simple-format #f "normalise-values: error: ~A\n" unknown))))
data))
+ (define (log-time action f)
+ (simple-format #t "debug: Starting ~A\n" action)
+ (let* ((start-time (current-time))
+ (result (f))
+ (time-taken (- (current-time) start-time)))
+ (simple-format #t "debug: Finished ~A, took ~A seconds\n"
+ action time-taken)
+ result))
+
(let* ((existing-entries
- (exec-query->vhash conn
- select-query
- cdr
- (lambda (result)
- (string->number (first result)))))
+ (if use-temporary-table?
+ (let ((temp-table-name
+ (string-append "temp_" table-name))
+ (data
+ (if sets-of-data?
+ (delete-duplicates (concatenate data))
+ (if delete-duplicates?
+ (delete-duplicates data)
+ data))))
+ ;; Create a temporary table to store the data
+ (exec-query
+ conn
+ (string-append "CREATE TEMPORARY TABLE "
+ temp-table-name
+ " (LIKE "
+ table-name
+ " INCLUDING ALL)"))
+ (exec-query
+ conn
+ (string-append
+ "ANALYZE " temp-table-name))
+
+ ;; Populate the temporary table
+ (log-time
+ (string-append "populating " temp-table-name)
+ (lambda ()
+ (exec-query conn
+ (insert-sql data
+ #:table-name temp-table-name))))
+ ;; Use the temporary table to find the existing values
+ (let ((result
+ (log-time
+ (string-append "querying the " temp-table-name)
+ (lambda ()
+ (exec-query->vhash
+ conn
+ (temp-table-select-query temp-table-name)
+ cdr
+ (lambda (result)
+ (string->number (first result))))))))
+
+ (exec-query conn (string-append "DROP TABLE " temp-table-name))
+ result))
+
+ ;; If not using a temporary table, just do a single SELECT query
+ (exec-query->vhash conn
+ select-query
+ cdr
+ (lambda (result)
+ (string->number (first result))))))
(missing-entries
(filter (lambda (field-values)
(not (vhash-assoc