From 5a7cb59648d102168bd4ecd16f36b69e0f594be1 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 19 Apr 2024 22:00:44 +0200 Subject: deduplication: Detect holes and create sparse files. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reduces disk usage of sparse files that are substituted such as Guile object files (ELF files). As of Guile 3.0.9, .go files are sparse due to ELF sections being aligned on 64 KiB boundaries. This reduces disk usage reported by “du -sh” by 9% for the ‘guix’ package, by 23% for ‘guile’, and by 35% for ‘guile-git’. * guix/store/deduplication.scm (hole-size, find-holes): New procedures. (tee)[seekable?]: New variable. [read!]: Add case when SEEKABLE? is true. * tests/store-deduplication.scm (cartesian-product): New procedure. ("copy-file/deduplicate, sparse files (holes: ~a/~a/~a)"): New test set. Change-Id: Iad2ab7830dcb1220e2026f4a127a6c718afa8964 --- tests/store-deduplication.scm | 58 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm index f1845035d8..f116ff9834 100644 --- a/tests/store-deduplication.scm +++ b/tests/store-deduplication.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2018, 2020-2022 Ludovic Courtès +;;; Copyright © 2018, 2020-2022, 2024 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -24,10 +24,27 @@ (define-module (test-store-deduplication) #:use-module (guix build utils) #:use-module (rnrs bytevectors) #:use-module (ice-9 binary-ports) + #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (srfi srfi-64)) +(define (cartesian-product . lst) + "Return the Cartesian product of all the given lists." + (match lst + ((head) + (map list head)) + ((head . rest) + (let ((others (apply cartesian-product rest))) + (append-map (lambda (init) + (map (lambda (lst) + (cons init lst)) + others)) + head))) + (() + '()))) + + (test-begin "store-deduplication") (test-equal "deduplicate, below %deduplication-minimum-size" @@ -166,4 +183,43 @@ (define-module (test-store-deduplication) (cut string-append store <>)) '("/a" "/b" "/c")))))))) +(for-each (match-lambda + ((initial-gap middle-gap final-gap) + (test-assert + (format #f "copy-file/deduplicate, sparse files (holes: ~a/~a/~a)" + initial-gap middle-gap final-gap) + (call-with-temporary-directory + (lambda (store) + (let ((source (string-append store "/source"))) + (call-with-output-file source + (lambda (port) + (seek port initial-gap SEEK_CUR) + (display "hi!" port) + (seek port middle-gap SEEK_CUR) + (display "bye." port) + (when (> final-gap 0) + (seek port (- final-gap 1) SEEK_CUR) + (put-u8 port 0)))) + + (for-each (lambda (target) + (copy-file/deduplicate source + (string-append store target) + #:store store)) + '("/a" "/b" "/c")) + (system* "du" "-h" source) + (system* "du" "-h" "--apparent-size" source) + (system* "du" "-h" (string-append store "/a")) + (system* "du" "-h" "--apparent-size" (string-append store "/a")) + (and (directory-exists? (string-append store "/.links")) + (file=? source (string-append store "/a")) + (apply = (map (compose stat:ino stat + (cut string-append store <>)) + '("/a" "/b" "/c"))) + (let ((st (pk 'S (stat (string-append store "/a"))))) + (<= (* 512 (stat:blocks st)) + (stat:size st)))))))))) + (cartesian-product '(0 3333 8192) + '(8192 9999 16384 22222) + '(0 8192))) + (test-end "store-deduplication") -- cgit v1.2.3