aboutsummaryrefslogtreecommitdiff
path: root/guix/store/deduplication.scm
diff options
context:
space:
mode:
Diffstat (limited to 'guix/store/deduplication.scm')
-rw-r--r--guix/store/deduplication.scm148
1 files changed, 148 insertions, 0 deletions
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
new file mode 100644
index 0000000000..4b4ac01f64
--- /dev/null
+++ b/guix/store/deduplication.scm
@@ -0,0 +1,148 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
+;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+;;; This houses stuff we do to files when they arrive at the store - resetting
+;;; timestamps, deduplicating, etc.
+
+(define-module (guix store deduplication)
+ #:use-module (guix hash)
+ #:use-module (guix build utils)
+ #:use-module (guix base16)
+ #:use-module (srfi srfi-11)
+ #:use-module (rnrs io ports)
+ #:use-module (ice-9 ftw)
+ #:use-module (guix serialization)
+ #:export (nar-sha256
+ deduplicate))
+
+;; Would it be better to just make WRITE-FILE give size as well? I question
+;; the general utility of this approach.
+(define (counting-wrapper-port output-port)
+ "Some custom ports don't implement GET-POSITION at all. But if we want to
+figure out how many bytes are being written, we will want to use that. So this
+makes a wrapper around a port which implements GET-POSITION."
+ (let ((byte-count 0))
+ (make-custom-binary-output-port "counting-wrapper"
+ (lambda (bytes offset count)
+ (set! byte-count
+ (+ byte-count count))
+ (put-bytevector output-port bytes
+ offset count)
+ count)
+ (lambda ()
+ byte-count)
+ #f
+ (lambda ()
+ (close-port output-port)))))
+
+(define (nar-sha256 file)
+ "Gives the sha256 hash of a file and the size of the file in nar form."
+ (let-values (((port get-hash) (open-sha256-port)))
+ (let ((wrapper (counting-wrapper-port port)))
+ (write-file file wrapper)
+ (force-output wrapper)
+ (force-output port)
+ (let ((hash (get-hash))
+ (size (port-position wrapper)))
+ (close-port wrapper)
+ (values hash size)))))
+
+(define (tempname-in directory)
+ "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
+unused by the time you create anything with that name, but a good shot."
+ (let ((const-part (string-append directory "/.tmp-link-"
+ (number->string (getpid)))))
+ (let try ((guess-part
+ (number->string (random most-positive-fixnum) 16)))
+ (if (file-exists? (string-append const-part "-" guess-part))
+ (try (number->string (random most-positive-fixnum) 16))
+ (string-append const-part "-" guess-part)))))
+
+(define* (get-temp-link target #:optional (link-prefix (dirname target)))
+ "Like mkstemp!, but instead of creating a new file and giving you the name,
+it creates a new hardlink to TARGET and gives you the name. Since
+cross-filesystem hardlinks don't work, the temp link must be created on the
+same filesystem - where in that filesystem it is can be controlled by
+LINK-PREFIX."
+ (let try ((tempname (tempname-in link-prefix)))
+ (catch 'system-error
+ (lambda ()
+ (link target tempname)
+ tempname)
+ (lambda (args)
+ (if (= (system-error-errno args) EEXIST)
+ (try (tempname-in link-prefix))
+ (throw 'system-error args))))))
+
+;; There are 3 main kinds of errors we can get from hardlinking: "Too many
+;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
+;; "can't fit more stuff in this directory" (ENOSPC).
+
+(define (replace-with-link target to-replace)
+ "Atomically replace the file TO-REPLACE with a link to TARGET. Note: TARGET
+and TO-REPLACE must be on the same file system."
+ (let ((temp-link (get-temp-link target (dirname to-replace))))
+ (rename-file temp-link to-replace)))
+
+(define-syntax-rule (false-if-system-error (errors ...) exp ...)
+ "Given ERRORS, a list of system error codes to ignore, evaluates EXP... and
+return #f if any of the system error codes in the given list are thrown."
+ (catch 'system-error
+ (lambda ()
+ exp ...)
+ (lambda args
+ (if (member (system-error-errno args) (list errors ...))
+ #f
+ (apply throw args)))))
+
+(define* (deduplicate path hash #:key (store %store-directory))
+ "Check if a store item with sha256 hash HASH already exists. If so,
+replace PATH with a hardlink to the already-existing one. If not, register
+PATH so that future duplicates can hardlink to it. PATH is assumed to be
+under STORE."
+ (let* ((links-directory (string-append store "/.links"))
+ (link-file (string-append links-directory "/"
+ (bytevector->base16-string hash))))
+ (mkdir-p links-directory)
+ (if (file-is-directory? path)
+ ;; Can't hardlink directories, so hardlink their atoms.
+ (for-each (lambda (file)
+ (unless (member file '("." ".."))
+ (deduplicate file (nar-sha256 file)
+ #:store store)))
+ (scandir path))
+ (if (file-exists? link-file)
+ (false-if-system-error (EMLINK)
+ (replace-with-link link-file path))
+ (catch 'system-error
+ (lambda ()
+ (link path link-file))
+ (lambda args
+ (let ((errno (system-error-errno args)))
+ (cond ((= errno EEXIST)
+ ;; Someone else put an entry for PATH in
+ ;; LINKS-DIRECTORY before we could. Let's use it.
+ (false-if-system-error (EMLINK)
+ (replace-with-link path link-file)))
+ ((= errno ENOSPC)
+ ;; There's not enough room in the directory index for
+ ;; more entries in .links, but that's fine: we can
+ ;; just stop.
+ #f)
+ (else (apply throw args))))))))))