From 00753f7038234a0f5a79be3ec9ab949840a18743 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Mon, 17 Apr 2017 23:13:40 +0200 Subject: publish: Add '--cache' and '--workers'. Fixes . Reported by . These options allow nars to be "baked" off-line and cached instead of being compressed on the fly. As a side-effect, this allows us to provide a 'Content-Length' header for nars. * guix/scripts/publish.scm (show-help, %options): Add '--cache' and '--workers'. (%default-options): Add 'workers'. (nar-cache-file, narinfo-cache-file, run-single-baker): New procedures. (single-baker): New macro. (render-narinfo/cached, bake-narinfo+nar) (render-nar/cached): New procedures. (make-request-handler): Add #:cache and #:pool parameters and honor them. (run-publish-server): Likewise. (guix-publish): Honor '--cache' and '--workers'. * tests/publish.scm ("with cache"): New test. * doc/guix.texi (Invoking guix publish): Document it. --- doc/guix.texi | 46 ++++++++++- guix/scripts/publish.scm | 197 +++++++++++++++++++++++++++++++++++++++++++---- tests/publish.scm | 54 +++++++++++++ 3 files changed, 280 insertions(+), 17 deletions(-) diff --git a/doc/guix.texi b/doc/guix.texi index fd3483ee5d..bbb2ba732d 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -6522,6 +6522,13 @@ archive}), the daemon may download substitutes from it: guix-daemon --substitute-urls=http://example.org:8080 @end example +By default, @command{guix publish} compresses archives on the fly as it +serves them. This ``on-the-fly'' mode is convenient in that it requires +no setup and is immediately available. However, when serving lots of +clients, we recommend using the @option{--cache} option, which enables +caching of the archives before they are sent to clients---see below for +details. + As a bonus, @command{guix publish} also serves as a content-addressed mirror for source files referenced in @code{origin} records (@pxref{origin Reference}). For instance, assuming @command{guix @@ -6559,10 +6566,43 @@ disable compression. The range 1 to 9 corresponds to different gzip compression levels: 1 is the fastest, and 9 is the best (CPU-intensive). The default is 3. -Compression occurs on the fly and the compressed streams are not +Unless @option{--cache} is used, compression occurs on the fly and +the compressed streams are not cached. Thus, to reduce load on the machine that runs @command{guix -publish}, it may be a good idea to choose a low compression level, or to -run @command{guix publish} behind a caching proxy. +publish}, it may be a good idea to choose a low compression level, to +run @command{guix publish} behind a caching proxy, or to use +@option{--cache}. Using @option{--cache} has the advantage that it +allows @command{guix publish} to add @code{Content-Length} HTTP header +to its responses. + +@item --cache=@var{directory} +@itemx -c @var{directory} +Cache archives and meta-data (@code{.narinfo} URLs) to @var{directory} +and only serve archives that are in cache. + +When this option is omitted, archives and meta-data are created +on-the-fly. This can reduce the available bandwidth, especially when +compression is enabled, since this may become CPU-bound. Another +drawback of the default mode is that the length of archives is not known +in advance, so @command{guix publish} does not add a +@code{Content-Length} HTTP header to its responses, which in turn +prevents clients from knowing the amount of data being downloaded. + +Conversely, when @option{--cache} is used, the first request for a store +item (@i{via} a @code{.narinfo} URL) returns 404 and triggers a +background process to @dfn{bake} the archive---computing its +@code{.narinfo} and compressing the archive, if needed. Once the +archive is cached in @var{directory}, subsequent requests succeed and +are served directly from the cache, which guarantees that clients get +the best possible bandwidth. + +The ``baking'' process is performed by worker threads. By default, one +thread per CPU core is created, but this can be customized. See +@option{--workers} below. + +@item --workers=@var{N} +When @option{--cache} is used, request the allocation of @var{N} worker +threads to ``bake'' archives. @item --ttl=@var{ttl} Produce @code{Cache-Control} HTTP headers that advertise a time-to-live diff --git a/guix/scripts/publish.scm b/guix/scripts/publish.scm index f54757b4c9..70d914d60c 100644 --- a/guix/scripts/publish.scm +++ b/guix/scripts/publish.scm @@ -24,6 +24,7 @@ (define-module (guix scripts publish) #:use-module (ice-9 match) #:use-module (ice-9 regex) #:use-module (ice-9 rdelim) + #:use-module (ice-9 threads) #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-2) @@ -45,13 +46,15 @@ (define-module (guix scripts publish) #:use-module (guix hash) #:use-module (guix pki) #:use-module (guix pk-crypto) + #:use-module (guix workers) #:use-module (guix store) #:use-module ((guix serialization) #:select (write-file)) #:use-module (guix zlib) #:use-module (guix ui) #:use-module (guix scripts) - #:use-module ((guix utils) #:select (compressed-file?)) - #:use-module ((guix build utils) #:select (dump-port)) + #:use-module ((guix utils) + #:select (with-atomic-file-output compressed-file?)) + #:use-module ((guix build utils) #:select (dump-port mkdir-p)) #:export (%public-key %private-key @@ -69,6 +72,10 @@ (define (show-help) (display (_ " -C, --compression[=LEVEL] compress archives at LEVEL")) + (display (_ " + -c, --cache=DIRECTORY cache published items to DIRECTORY")) + (display (_ " + --workers=N use N workers to bake items")) (display (_ " --ttl=TTL announce narinfos can be cached for TTL seconds")) (display (_ " @@ -154,6 +161,13 @@ (define %options (warning (_ "zlib support is missing; \ compression disabled~%")) result)))))) + (option '(#\c "cache") #t #f + (lambda (opt name arg result) + (alist-cons 'cache arg result))) + (option '("workers") #t #f + (lambda (opt name arg result) + (alist-cons 'workers (string->number* arg) + result))) (option '("ttl") #t #f (lambda (opt name arg result) (let ((duration (string->duration arg))) @@ -190,6 +204,9 @@ (define %default-options %default-gzip-compression %no-compression)) + ;; Default number of workers when caching is enabled. + (workers . ,(current-processor-count)) + (address . ,(make-socket-address AF_INET INADDR_ANY 0)) (repl . #f))) @@ -308,6 +325,121 @@ (define* (render-narinfo store request hash #:compression compression) <>))))) +(define* (nar-cache-file directory item + #:key (compression %no-compression)) + (string-append directory "/" + (symbol->string (compression-type compression)) + "/" (basename item) ".nar")) + +(define* (narinfo-cache-file directory item + #:key (compression %no-compression)) + (string-append directory "/" + (symbol->string (compression-type compression)) + "/" (basename item) + ".narinfo")) + +(define run-single-baker + (let ((baking (make-weak-value-hash-table)) + (mutex (make-mutex))) + (lambda (item thunk) + "Run THUNK, which is supposed to bake ITEM, but make sure only one +thread is baking ITEM at a given time." + (define selected? + (with-mutex mutex + (and (not (hash-ref baking item)) + (begin + (hash-set! baking item (current-thread)) + #t)))) + + (when selected? + (dynamic-wind + (const #t) + thunk + (lambda () + (with-mutex mutex + (hash-remove! baking item)))))))) + +(define-syntax-rule (single-baker item exp ...) + "Bake ITEM by evaluating EXP, but make sure there's only one baker for ITEM +at a time." + (run-single-baker item (lambda () exp ...))) + + +(define* (render-narinfo/cached store request hash + #:key ttl (compression %no-compression) + (nar-path "nar") + cache pool) + "Respond to the narinfo request for REQUEST. If the narinfo is available in +CACHE, then send it; otherwise, return 404 and \"bake\" that nar and narinfo +requested using POOL." + (let* ((item (hash-part->path store hash)) + (compression (actual-compression item compression)) + (cached (and (not (string-null? item)) + (narinfo-cache-file cache item + #:compression compression)))) + (cond ((string-null? item) + (not-found request)) + ((file-exists? cached) + ;; Narinfo is in cache, send it. + (values `((content-type . (application/x-nix-narinfo)) + ,@(if ttl + `((cache-control (max-age . ,ttl))) + '())) + (lambda (port) + (display (call-with-input-file cached + read-string) + port)))) + ((valid-path? store item) + ;; Nothing in cache: bake the narinfo and nar in the background and + ;; return 404. + (eventually pool + (single-baker item + ;; (format #t "baking ~s~%" item) + (bake-narinfo+nar cache item + #:ttl ttl + #:compression compression + #:nar-path nar-path))) + (not-found request)) + (else + (not-found request))))) + +(define* (bake-narinfo+nar cache item + #:key ttl (compression %no-compression) + (nar-path "/nar")) + "Write the narinfo and nar for ITEM to CACHE." + (let* ((compression (actual-compression item compression)) + (nar (nar-cache-file cache item + #:compression compression)) + (narinfo (narinfo-cache-file cache item + #:compression compression))) + + (mkdir-p (dirname nar)) + (match (compression-type compression) + ('gzip + ;; Note: the file port gets closed along with the gzip port. + (call-with-gzip-output-port (open-output-file (string-append nar ".tmp")) + (lambda (port) + (write-file item port)) + #:level (compression-level compression)) + (rename-file (string-append nar ".tmp") nar)) + ('none + ;; When compression is disabled, we retrieve files directly from the + ;; store; no need to cache them. + #t)) + + (mkdir-p (dirname narinfo)) + (with-atomic-file-output narinfo + (lambda (port) + ;; Open a new connection to the store. We cannot reuse the main + ;; thread's connection to the store since we would end up sending + ;; stuff concurrently on the same channel. + (with-store store + (display (narinfo-string store item + (%private-key) + #:nar-path nar-path + #:compression compression) + port)))))) + ;; XXX: Declare the 'Guix-Compression' HTTP header, which is in fact for ;; internal consumption: it allows us to pass the compression info to ;; 'http-write', as part of the workaround to . @@ -339,6 +471,21 @@ (define* (render-nar store request store-item store-path) (not-found request)))) +(define* (render-nar/cached store cache request store-item + #:key (compression %no-compression)) + "Respond to REQUEST with a nar for STORE-ITEM. If the nar is in CACHE, +return it; otherwise, return 404." + (let ((cached (nar-cache-file cache store-item + #:compression compression))) + (if (file-exists? cached) + (values `((content-type . (application/octet-stream + (charset . "ISO-8859-1")))) + ;; XXX: We're not returning the actual contents, deferring + ;; instead to 'http-write'. This is a hack to work around + ;; . + cached) + (not-found request)))) + (define (render-content-addressed-file store request name algo hash) "Return the content of the result of the fixed-output derivation NAME that @@ -495,6 +642,7 @@ (define-server-impl concurrent-http-server (define* (make-request-handler store #:key + cache pool narinfo-ttl (nar-path "nar") (compression %no-compression)) @@ -515,10 +663,17 @@ (define nar-path? (((= extract-narinfo-hash (? string? hash))) ;; TODO: Register roots for HASH that will somehow remain for ;; NARINFO-TTL. - (render-narinfo store request hash - #:ttl narinfo-ttl - #:nar-path nar-path - #:compression compression)) + (if cache + (render-narinfo/cached store request hash + #:cache cache + #:pool pool + #:ttl narinfo-ttl + #:nar-path nar-path + #:compression compression) + (render-narinfo store request hash + #:ttl narinfo-ttl + #:nar-path nar-path + #:compression compression))) ;; /nar/file/NAME/sha256/HASH (("file" name "sha256" hash) (guard (c ((invalid-base32-character? c) @@ -534,13 +689,16 @@ (define nar-path? ;; /nar/gzip/ ((components ... "gzip" store-item) (if (and (nar-path? components) (zlib-available?)) - (render-nar store request store-item - #:compression - (match compression - (($ 'gzip) - compression) - (_ - %default-gzip-compression))) + (let ((compression (match compression + (($ 'gzip) + compression) + (_ + %default-gzip-compression)))) + (if cache + (render-nar/cached store cache request store-item + #:compression compression) + (render-nar store request store-item + #:compression compression))) (not-found request))) ;; /nar/ @@ -555,8 +713,11 @@ (define nar-path? (define* (run-publish-server socket store #:key (compression %no-compression) - (nar-path "nar") narinfo-ttl) + (nar-path "nar") narinfo-ttl + cache pool) (run-server (make-request-handler store + #:cache cache + #:pool pool #:nar-path nar-path #:narinfo-ttl narinfo-ttl #:compression compression) @@ -606,6 +767,8 @@ (define (guix-publish . args) (socket (open-server-socket address)) (nar-path (assoc-ref opts 'nar-path)) (repl-port (assoc-ref opts 'repl)) + (cache (assoc-ref opts 'cache)) + (workers (assoc-ref opts 'workers)) ;; Read the key right away so that (1) we fail early on if we can't ;; access them, and (2) we can then drop privileges. @@ -631,6 +794,12 @@ (define (guix-publish . args) (repl:spawn-server (repl:make-tcp-server-socket #:port repl-port))) (with-store store (run-publish-server socket store + #:cache cache + #:pool (and cache (make-pool workers)) #:nar-path nar-path #:compression compression #:narinfo-ttl ttl)))))) + +;;; Local Variables: +;;; eval: (put 'single-baker 'scheme-indent-function 1) +;;; End: diff --git a/tests/publish.scm b/tests/publish.scm index ea0f4a3477..233b71ce93 100644 --- a/tests/publish.scm +++ b/tests/publish.scm @@ -314,4 +314,58 @@ (define (wait-until-ready port) (call-with-input-string "" port-sha256)))))) (response-code (http-get uri)))) +(unless (zlib-available?) + (test-skip 1)) +(test-equal "with cache" + (list #t + `(("StorePath" . ,%item) + ("URL" . ,(string-append "nar/gzip/" (basename %item))) + ("Compression" . "gzip")) + 200 ;nar/gzip/… + #t ;Content-Length + 200) ;nar/… + (call-with-temporary-directory + (lambda (cache) + (define (wait-for-file file) + (let loop ((i 20)) + (or (file-exists? file) + (begin + (pk 'wait-for-file file) + (sleep 1) + (loop (- i 1)))))) + + (let ((thread (with-separate-output-ports + (call-with-new-thread + (lambda () + (guix-publish "--port=6797" "-C2" + (string-append "--cache=" cache))))))) + (wait-until-ready 6797) + (let* ((base "http://localhost:6797/") + (part (store-path-hash-part %item)) + (url (string-append base part ".narinfo")) + (nar-url (string-append base "/nar/gzip/" (basename %item))) + (cached (string-append cache "/gzip/" (basename %item) + ".narinfo")) + (nar (string-append cache "/gzip/" + (basename %item) ".nar")) + (response (http-get url))) + (and (= 404 (response-code response)) + (wait-for-file cached) + (let ((body (http-get-port url)) + (compressed (http-get nar-url)) + (uncompressed (http-get (string-append base "nar/" + (basename %item))))) + (list (file-exists? nar) + (filter (lambda (item) + (match item + (("Compression" . _) #t) + (("StorePath" . _) #t) + (("URL" . _) #t) + (_ #f))) + (recutils->alist body)) + (response-code compressed) + (= (response-content-length compressed) + (stat:size (stat nar))) + (response-code uncompressed))))))))) + (test-end "publish") -- cgit v1.2.3