diff options
author | Christopher Baines <mail@cbaines.net> | 2024-01-15 11:18:39 +0000 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2024-01-18 14:41:32 +0000 |
commit | 49b4841c4e1f6745fd5a148cdeb3cf118164b70c (patch) | |
tree | 9bac092871b791681e5542f6fdb2f65fd8b50894 | |
parent | 241d7e4889e4b3d8a042cd2348004ccc19d6fc1d (diff) | |
download | data-service-49b4841c4e1f6745fd5a148cdeb3cf118164b70c.tar data-service-49b4841c4e1f6745fd5a148cdeb3cf118164b70c.tar.gz |
Use delete-duplicates/sort! in insert-missing-data-and-return-all-ids
As it's faster than delete-duplicates for large amounts of data.
-rw-r--r-- | guix-data-service/model/utils.scm | 48 |
1 files changed, 43 insertions, 5 deletions
diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm index f40174a..b46e2e4 100644 --- a/guix-data-service/model/utils.scm +++ b/guix-data-service/model/utils.scm @@ -178,6 +178,44 @@ WHERE table_name = $1" (error (simple-format #f "error: unknown type for value: ~A" v))))) + (define (delete-duplicates* data) + (delete-duplicates/sort! + (list-copy data) + (lambda (full-a full-b) + (let loop ((a full-a) + (b full-b)) + (if (null? a) + #f + (let ((a-val (match (car a) + ((_ . val) val) + ((? symbol? val) (symbol->string val)) + (val val))) + (b-val (match (car b) + ((_ . val) val) + ((? symbol? val) (symbol->string val)) + (val val)))) + (cond + ((null? a-val) + (if (null? b-val) + (loop (cdr a) (cdr b)) + #t)) + ((null? b-val) + #f) + (else + (match a-val + ((? string? v) + (if (string=? a-val b-val) + (loop (cdr a) (cdr b)) + (string<? a-val b-val))) + ((? number? v) + (if (= a-val b-val) + (loop (cdr a) (cdr b)) + (< a-val b-val))) + ((? boolean? v) + (if (eq? a-val b-val) + (loop (cdr a) (cdr b)) + a-val))))))))))) + (define schema-details (table-schema conn table-name)) @@ -312,9 +350,9 @@ WHERE table_name = $1" (string-append "temp_" table-name)) (data (if sets-of-data? - (delete-duplicates (concatenate data)) + (delete-duplicates* (concatenate data)) (if delete-duplicates? - (delete-duplicates data) + (delete-duplicates* data) data)))) ;; Create a temporary table to store the data (exec-query @@ -363,7 +401,7 @@ WHERE table_name = $1" #:vhash result)) vlist-null (chunk (if sets-of-data? - (delete-duplicates + (delete-duplicates* (concatenate data)) data) 3000))))) @@ -375,9 +413,9 @@ WHERE table_name = $1" (normalise-values field-values) existing-entries))) (if sets-of-data? - (delete-duplicates (concatenate data)) + (delete-duplicates* (concatenate data)) (if delete-duplicates? - (delete-duplicates data) + (delete-duplicates* data) data)))) (new-entries (if (null? missing-entries) |