aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2024-01-15 11:18:39 +0000
committerChristopher Baines <mail@cbaines.net>2024-01-18 14:41:32 +0000
commit49b4841c4e1f6745fd5a148cdeb3cf118164b70c (patch)
tree9bac092871b791681e5542f6fdb2f65fd8b50894
parent241d7e4889e4b3d8a042cd2348004ccc19d6fc1d (diff)
downloaddata-service-49b4841c4e1f6745fd5a148cdeb3cf118164b70c.tar
data-service-49b4841c4e1f6745fd5a148cdeb3cf118164b70c.tar.gz
Use delete-duplicates/sort! in insert-missing-data-and-return-all-ids
As it's faster than delete-duplicates for large amounts of data.
-rw-r--r--guix-data-service/model/utils.scm48
1 files changed, 43 insertions, 5 deletions
diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm
index f40174a..b46e2e4 100644
--- a/guix-data-service/model/utils.scm
+++ b/guix-data-service/model/utils.scm
@@ -178,6 +178,44 @@ WHERE table_name = $1"
(error
(simple-format #f "error: unknown type for value: ~A" v)))))
+ (define (delete-duplicates* data)
+ (delete-duplicates/sort!
+ (list-copy data)
+ (lambda (full-a full-b)
+ (let loop ((a full-a)
+ (b full-b))
+ (if (null? a)
+ #f
+ (let ((a-val (match (car a)
+ ((_ . val) val)
+ ((? symbol? val) (symbol->string val))
+ (val val)))
+ (b-val (match (car b)
+ ((_ . val) val)
+ ((? symbol? val) (symbol->string val))
+ (val val))))
+ (cond
+ ((null? a-val)
+ (if (null? b-val)
+ (loop (cdr a) (cdr b))
+ #t))
+ ((null? b-val)
+ #f)
+ (else
+ (match a-val
+ ((? string? v)
+ (if (string=? a-val b-val)
+ (loop (cdr a) (cdr b))
+ (string<? a-val b-val)))
+ ((? number? v)
+ (if (= a-val b-val)
+ (loop (cdr a) (cdr b))
+ (< a-val b-val)))
+ ((? boolean? v)
+ (if (eq? a-val b-val)
+ (loop (cdr a) (cdr b))
+ a-val)))))))))))
+
(define schema-details
(table-schema conn table-name))
@@ -312,9 +350,9 @@ WHERE table_name = $1"
(string-append "temp_" table-name))
(data
(if sets-of-data?
- (delete-duplicates (concatenate data))
+ (delete-duplicates* (concatenate data))
(if delete-duplicates?
- (delete-duplicates data)
+ (delete-duplicates* data)
data))))
;; Create a temporary table to store the data
(exec-query
@@ -363,7 +401,7 @@ WHERE table_name = $1"
#:vhash result))
vlist-null
(chunk (if sets-of-data?
- (delete-duplicates
+ (delete-duplicates*
(concatenate data))
data)
3000)))))
@@ -375,9 +413,9 @@ WHERE table_name = $1"
(normalise-values field-values)
existing-entries)))
(if sets-of-data?
- (delete-duplicates (concatenate data))
+ (delete-duplicates* (concatenate data))
(if delete-duplicates?
- (delete-duplicates data)
+ (delete-duplicates* data)
data))))
(new-entries
(if (null? missing-entries)