aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2020-12-10 11:21:14 +0100
committerLudovic Courtès <ludo@gnu.org>2020-12-15 17:32:09 +0100
commit2718c29c3fb9f9de2ec897248ad49ae11ca39b7a (patch)
tree6698852d62943afa23f01f8fbeec8033db9c7ef7
parented7d02f7c198970ce3fe94bcee47592963326446 (diff)
downloadguix-2718c29c3fb9f9de2ec897248ad49ae11ca39b7a.tar
guix-2718c29c3fb9f9de2ec897248ad49ae11ca39b7a.tar.gz
nar: Deduplicate files right as they are restored.
This avoids having to traverse and re-read the files that we have just restored, thereby reducing I/O. * guix/serialization.scm (dump-file): New procedure. (restore-file): Add #:dump-file parameter and honor it. * guix/store/deduplication.scm (tee, dump-file/deduplicate): New procedures. * guix/nar.scm (restore-one-item): Pass #:dump-file to 'restore-file'. (finalize-store-file): Pass #:deduplicate? #f to 'register-items'. * tests/nar.scm <top level>: Call 'setenv' to set "NIX_STORE".
-rw-r--r--guix/nar.scm12
-rw-r--r--guix/serialization.scm27
-rw-r--r--guix/store/deduplication.scm57
-rw-r--r--tests/nar.scm3
4 files changed, 85 insertions, 14 deletions
diff --git a/guix/nar.scm b/guix/nar.scm
index edfcc9aab5..ba035ca6dc 100644
--- a/guix/nar.scm
+++ b/guix/nar.scm
@@ -27,6 +27,7 @@
;; (guix store) since this is "daemon-side" code.
#:use-module (guix store)
#:use-module (guix store database)
+ #:use-module ((guix store deduplication) #:select (dump-file/deduplicate))
#:use-module ((guix build store-copy) #:select (store-info))
#:use-module (guix i18n)
@@ -114,12 +115,12 @@ held."
;; Install the new TARGET.
(rename-file source target)
- ;; Register TARGET. As a side effect, run a deduplication pass.
- ;; Timestamps and permissions are already correct thanks to
- ;; 'restore-file'.
+ ;; Register TARGET. The 'restore-file' call took care of
+ ;; deduplication, timestamps, and permissions.
(register-items db
(list (store-info target deriver references))
- #:reset-timestamps? #f))
+ #:reset-timestamps? #f
+ #:deduplicate? #f))
(when lock?
(delete-file (string-append target ".lock"))
@@ -212,7 +213,8 @@ s-expression"))
(let-values (((port get-hash)
(open-sha256-input-port port)))
(with-temporary-store-file temp
- (restore-file port temp)
+ (restore-file port temp
+ #:dump-file dump-file/deduplicate)
(let ((magic (read-int port)))
(unless (= magic %export-magic)
diff --git a/guix/serialization.scm b/guix/serialization.scm
index 677ca60b66..9e2dce8bb0 100644
--- a/guix/serialization.scm
+++ b/guix/serialization.scm
@@ -457,9 +457,22 @@ depends on TYPE."
(&message (message "unsupported nar entry type"))
(&nar-read-error (port port) (file file) (token x)))))))))
-(define (restore-file port file)
+(define (dump-file file input size type)
+ "Dump SIZE bytes from INPUT to FILE."
+ (call-with-output-file file
+ (lambda (output)
+ (dump input output size))))
+
+(define* (restore-file port file
+ #:key (dump-file dump-file))
"Read a file (possibly a directory structure) in Nar format from PORT.
-Restore it as FILE with canonical permissions and timestamps."
+Restore it as FILE with canonical permissions and timestamps. To write a
+regular or executable file, call:
+
+ (DUMP-FILE FILE INPUT SIZE TYPE)
+
+The default is to dump SIZE bytes from INPUT to FILE, but callers can provide
+a custom procedure, for instance to deduplicate FILE on the fly."
(fold-archive (lambda (file type content result)
(match type
('directory
@@ -473,12 +486,10 @@ Restore it as FILE with canonical permissions and timestamps."
((or 'regular 'executable)
(match content
((input . size)
- (call-with-output-file file
- (lambda (output)
- (dump input output size)
- (chmod output (if (eq? type 'executable)
- #o555
- #o444))))
+ (dump-file file input size type)
+ (chmod file (if (eq? type 'executable)
+ #o555
+ #o444))
(utime file 1 1 0 0))))))
#t
port
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index 0655ceb890..b4d37d4525 100644
--- a/guix/store/deduplication.scm
+++ b/guix/store/deduplication.scm
@@ -26,12 +26,15 @@
#:use-module (guix build syscalls)
#:use-module (guix base32)
#:use-module (srfi srfi-11)
+ #:use-module (srfi srfi-34)
+ #:use-module (srfi srfi-35)
#:use-module (rnrs io ports)
#:use-module (ice-9 ftw)
#:use-module (ice-9 match)
#:use-module (guix serialization)
#:export (nar-sha256
- deduplicate))
+ deduplicate
+ dump-file/deduplicate))
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
;; 'port-position' throws to 'out-of-range' when the offset is great than or
@@ -201,3 +204,55 @@ under STORE."
;; that's OK: we just can't deduplicate it more.
#f)
(else (apply throw args)))))))))))
+
+(define (tee input len output)
+ "Return a port that reads up to LEN bytes from INPUT and writes them to
+OUTPUT as it goes."
+ (define bytes-read 0)
+
+ (define (fail)
+ ;; Reached EOF before we had read LEN bytes from INPUT.
+ (raise (condition
+ (&nar-error (port input)
+ (file (port-filename output))))))
+
+ (define (read! bv start count)
+ ;; Read at most LEN bytes in total.
+ (let ((count (min count (- len bytes-read))))
+ (let loop ((ret (get-bytevector-n! input bv start count)))
+ (cond ((eof-object? ret)
+ (if (= bytes-read len)
+ 0 ; EOF
+ (fail)))
+ ((and (zero? ret) (> count 0))
+ ;; Do not return zero since zero means EOF, so try again.
+ (loop (get-bytevector-n! input bv start count)))
+ (else
+ (put-bytevector output bv start ret)
+ (set! bytes-read (+ bytes-read ret))
+ ret)))))
+
+ (make-custom-binary-input-port "tee input port" read! #f #f #f))
+
+(define* (dump-file/deduplicate file input size type
+ #:key (store (%store-directory)))
+ "Write SIZE bytes read from INPUT to FILE. TYPE is a symbol, either
+'regular or 'executable.
+
+This procedure is suitable as a #:dump-file argument to 'restore-file'. When
+used that way, it deduplicates files on the fly as they are restored, thereby
+removing the need to a deduplication pass that would re-read all the files
+down the road."
+ (define hash
+ (call-with-output-file file
+ (lambda (output)
+ (let-values (((hash-port get-hash)
+ (open-hash-port (hash-algorithm sha256))))
+ (write-file-tree file hash-port
+ #:file-type+size (lambda (_) (values type size))
+ #:file-port
+ (const (tee input size output)))
+ (close-port hash-port)
+ (get-hash)))))
+
+ (deduplicate file hash #:store store))
diff --git a/tests/nar.scm b/tests/nar.scm
index 59616659c8..ba4881caaa 100644
--- a/tests/nar.scm
+++ b/tests/nar.scm
@@ -452,6 +452,9 @@
(false-if-exception (rm-rf %test-dir))
(setlocale LC_ALL locale)))))
+;; XXX: Tell the 'deduplicate' procedure what store we're actually using.
+(setenv "NIX_STORE" (%store-prefix))
+
(test-assert "restore-file-set (signed, valid)"
(with-store store
(let* ((texts (unfold (cut >= <> 10)