Re-write insert-derivation-inputs in a more memory efficient manor

Previously it would compute a long list of strings, potentially more than 100,000 elements long, then split this string up and insert it in chunks. Only then could memory be freed. This new approach builds the strings in batches for the insertion query, then moves on to the next batch. This should mean that more memory can be freed and reused along the way.
author: Christopher Baines <mail@cbaines.net> 2022-01-12 18:18:15 +0000
committer: Christopher Baines <mail@cbaines.net> 2022-01-12 18:18:15 +0000
commit: 21cb33a859a25ac6ba82f32e014ea642e2e62afc (patch)
tree: e068f8f4a7f8aa47016f6ad2a73ff126f04f11ff /guix-data-service/model
parent: 6102553d947c8ae2f321091916986b091b94cee0 (diff)
download: data-service-21cb33a859a25ac6ba82f32e014ea642e2e62afc.tar
data-service-21cb33a859a25ac6ba82f32e014ea642e2e62afc.tar.gz
1 files changed, 38 insertions, 35 deletions
diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm
index 208bee6..9b88fc1 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -1329,48 +1329,51 @@ WHERE derivation_source_files.store_path = $1"
         #f)))
 
 (define (insert-derivation-inputs conn derivation-ids derivations)
-  (let ((data
-         (append-map
-          (lambda (derivation-id derivation)
-            (append-map
-             (match-lambda
-               (($ <derivation-input> derivation-or-path sub-derivations)
-                (let ((path
-                       (match derivation-or-path
-                         ((? derivation? d)
-                          ;; The first field changed to a derivation (from the file
-                          ;; name) in 5cf4b26d52bcea382d98fb4becce89be9ee37b55
-                          (derivation-file-name d))
-                         ((? string? s)
-                          s))))
-                  (map (lambda (sub-derivation)
-                         (string-append "("
-                                        (number->string derivation-id)
-                                        ", '" path
-                                        "', '" sub-derivation "')"))
-                       sub-derivations))))
-             (derivation-inputs derivation)))
-          derivation-ids
-          derivations)))
-
-    (unless (null? data)
-      (for-each
-       (lambda (chunk)
-         (exec-query
-          conn
-          (string-append
-           "
+  (define (process-chunk derivation-ids derivations)
+    (let ((query-parts
+           (append-map!
+            (lambda (derivation-id derivation)
+              (append-map!
+               (match-lambda
+                 (($ <derivation-input> derivation-or-path sub-derivations)
+                  (let ((path
+                         (match derivation-or-path
+                           ((? derivation? d)
+                            ;; The first field changed to a derivation (from the file
+                            ;; name) in 5cf4b26d52bcea382d98fb4becce89be9ee37b55
+                            (derivation-file-name d))
+                           ((? string? s)
+                            s))))
+                    (map (lambda (sub-derivation)
+                           (string-append "("
+                                          (number->string derivation-id)
+                                          ", '" path
+                                          "', '" sub-derivation "')"))
+                         sub-derivations))))
+               (derivation-inputs derivation)))
+            derivation-ids
+            derivations)))
+
+      (unless (null? query-parts)
+        (exec-query
+         conn
+         (string-append
+          "
 INSERT INTO derivation_inputs (derivation_id, derivation_output_id)
 SELECT vals.derivation_id, derivation_outputs.id
 FROM (VALUES "
-           (string-join chunk ", ")
-           ") AS vals (derivation_id, file_name, output_name)
+          (string-join query-parts ", ")
+          ") AS vals (derivation_id, file_name, output_name)
 INNER JOIN derivations
   ON derivations.file_name = vals.file_name
 INNER JOIN derivation_outputs
   ON derivation_outputs.derivation_id = derivations.id
- AND vals.output_name = derivation_outputs.name")))
-       (chunk! data 1000)))))
+ AND vals.output_name = derivation_outputs.name")))))
+
+  (chunk-map! process-chunk
+              1000
+              (list-copy derivation-ids)
+              (list-copy derivations)))
 
 (define (select-from-derivation-source-files store-paths)
   (string-append
author	Christopher Baines <mail@cbaines.net>	2022-01-12 18:18:15 +0000
committer	Christopher Baines <mail@cbaines.net>	2022-01-12 18:18:15 +0000
commit	21cb33a859a25ac6ba82f32e014ea642e2e62afc (patch)
tree	e068f8f4a7f8aa47016f6ad2a73ff126f04f11ff /guix-data-service/model
parent	6102553d947c8ae2f321091916986b091b94cee0 (diff)
download	data-service-21cb33a859a25ac6ba82f32e014ea642e2e62afc.tar data-service-21cb33a859a25ac6ba82f32e014ea642e2e62afc.tar.gz