diff options
author | Ludovic Courtès <ludo@gnu.org> | 2020-10-24 16:31:18 +0200 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2020-10-28 16:24:47 +0100 |
commit | ecaa102a58ad3ab0b42e04a3d10d7c761c05ec98 (patch) | |
tree | e89b3791dec3f3fada0db1768b109414b4b56cf1 | |
parent | 6d1fd37182f17e4178e2950a22a5aed0ba135587 (diff) | |
download | guix-ecaa102a58ad3ab0b42e04a3d10d7c761c05ec98.tar guix-ecaa102a58ad3ab0b42e04a3d10d7c761c05ec98.tar.gz |
publish: Add '--cache-bypass-threshold'.
* guix/scripts/publish.scm (show-help, %options): Add
'--cache-bypass-threshold'.
(low-compression): New procedure.
(cache-bypass-threshold): New parameter.
(bypass-cache?): New procedure.
(render-narinfo/cached): Call 'render-narinfo' when 'bypass-cache?'
returns true.
(render-nar/cached): Call 'render-nar' when 'bypass-cache?' returns
true.
(guix-publish): Parameterize 'cache-bypass-threshold'.
* tests/publish.scm ("with cache", "with cache, lzip + gzip")
("with cache, uncompressed"): Pass '--cache-bypass-threshold=0'.
("with cache, vanishing item"): Expect 200 for RESPONSE.
("with cache, cache bypass"): New test.
-rw-r--r-- | doc/guix.texi | 24 | ||||
-rw-r--r-- | guix/scripts/publish.scm | 87 | ||||
-rw-r--r-- | tests/publish.scm | 43 |
3 files changed, 131 insertions, 23 deletions
diff --git a/doc/guix.texi b/doc/guix.texi index ba7bb9612e..22bddf10e3 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -12086,13 +12086,20 @@ in advance, so @command{guix publish} does not add a prevents clients from knowing the amount of data being downloaded. Conversely, when @option{--cache} is used, the first request for a store -item (@i{via} a @code{.narinfo} URL) returns 404 and triggers a +item (@i{via} a @code{.narinfo} URL) triggers a background process to @dfn{bake} the archive---computing its @code{.narinfo} and compressing the archive, if needed. Once the archive is cached in @var{directory}, subsequent requests succeed and are served directly from the cache, which guarantees that clients get the best possible bandwidth. +That first @code{.narinfo} request nonetheless returns 200, provided the +requested store item is ``small enough'', below the cache bypass +threshold---see @option{--cache-bypass-threshold} below. That way, +clients do not have to wait until the archive is baked. For larger +store items, the first @code{.narinfo} request returns 404, meaning that +clients have to wait until the archive is baked. + The ``baking'' process is performed by worker threads. By default, one thread per CPU core is created, but this can be customized. See @option{--workers} below. @@ -12118,6 +12125,21 @@ Additionally, when @option{--cache} is used, cached entries that have not been accessed for @var{ttl} and that no longer have a corresponding item in the store, may be deleted. +@item --cache-bypass-threshold=@var{size} +When used in conjunction with @option{--cache}, store items smaller than +@var{size} are immediately available, even when they are not yet in +cache. @var{size} is a size in bytes, or it can be prefixed by @code{M} +for megabytes and so on. The default is @code{10M}. + +``Cache bypass'' allows you to reduce the publication delay for clients +at the expense of possibly additional I/O and CPU use on the server +side: depending on the client access patterns, those store items can end +up being baked several times until a copy is available in cache. + +Increasing the threshold may be useful for sites that have few users, or +to guarantee that users get substitutes even for store items that are +not popular. + @item --nar-path=@var{path} Use @var{path} as the prefix for the URLs of ``nar'' files (@pxref{Invoking guix archive, normalized archives}). diff --git a/guix/scripts/publish.scm b/guix/scripts/publish.scm index 1741b93309..9706b52844 100644 --- a/guix/scripts/publish.scm +++ b/guix/scripts/publish.scm @@ -83,6 +83,9 @@ Publish ~a over HTTP.\n") %store-directory) (display (G_ " -c, --cache=DIRECTORY cache published items to DIRECTORY")) (display (G_ " + --cache-bypass-threshold=SIZE + serve store items below SIZE even when not cached")) + (display (G_ " --workers=N use N workers to bake items")) (display (G_ " --ttl=TTL announce narinfos can be cached for TTL seconds")) @@ -135,6 +138,12 @@ if ITEM is already compressed." (list %no-compression) requested)) +(define (low-compression c) + "Return <compression> of the same type as C, but optimized for low CPU +usage." + (compression (compression-type c) + (min (compression-level c) 2))) + (define %options (list (option '(#\h "help") #f #f (lambda _ @@ -185,6 +194,10 @@ if ITEM is already compressed." (option '(#\c "cache") #t #f (lambda (opt name arg result) (alist-cons 'cache arg result))) + (option '("cache-bypass-threshold") #t #f + (lambda (opt name arg result) + (alist-cons 'cache-bypass-threshold (size->number arg) + result))) (option '("workers") #t #f (lambda (opt name arg result) (alist-cons 'workers (string->number* arg) @@ -435,7 +448,7 @@ items. Failing that, we could eventually have to recompute them and return (expiration-time file)))))) (define (hash-part->path* store hash cache) - "Like 'hash-part->path' but cached results under CACHE. This ensures we can + "Like 'hash-part->path' but cache results under CACHE. This ensures we can still map HASH to the corresponding store file name, even if said store item vanished from the store in the meantime." (let ((cached (hash-part-mapping-cache-file cache hash))) @@ -455,6 +468,18 @@ vanished from the store in the meantime." result)) (apply throw args)))))) +(define cache-bypass-threshold + ;; Maximum size of a store item that may be served by the '/cached' handlers + ;; below even when not in cache. + (make-parameter (* 10 (expt 2 20)))) + +(define (bypass-cache? store item) + "Return true if we allow ITEM to be downloaded before it is cached. ITEM is +interpreted as the basename of a store item." + (guard (c ((store-error? c) #f)) + (< (path-info-nar-size (query-path-info store item)) + (cache-bypass-threshold)))) + (define* (render-narinfo/cached store request hash #:key ttl (compressions (list %no-compression)) (nar-path "nar") @@ -514,9 +539,20 @@ requested using POOL." (nar-expiration-time ttl) #:delete-entry delete-entry #:cleanup-period ttl)))) - (not-found request - #:phrase "We're baking it" - #:ttl 300)) ;should be available within 5m + + ;; If ITEM passes 'bypass-cache?', render a temporary narinfo right + ;; away, with a short TTL. The narinfo is temporary because it + ;; lacks 'FileSize', for instance, which the cached narinfo will + ;; have. Chances are that the nar will be baked by the time the + ;; client asks for it. + (if (bypass-cache? store item) + (render-narinfo store request hash + #:ttl 300 ;temporary + #:nar-path nar-path + #:compressions compressions) + (not-found request + #:phrase "We're baking it" + #:ttl 300))) ;should be available within 5m (else (not-found request #:phrase ""))))) @@ -628,19 +664,31 @@ return it; otherwise, return 404. When TTL is true, use it as the 'Cache-Control' expiration time." (let ((cached (nar-cache-file cache store-item #:compression compression))) - (if (file-exists? cached) - (values `((content-type . (application/octet-stream - (charset . "ISO-8859-1"))) - ,@(if ttl - `((cache-control (max-age . ,ttl))) - '()) - - ;; XXX: We're not returning the actual contents, deferring - ;; instead to 'http-write'. This is a hack to work around - ;; <http://bugs.gnu.org/21093>. - (x-raw-file . ,cached)) - #f) - (not-found request)))) + (cond ((file-exists? cached) + (values `((content-type . (application/octet-stream + (charset . "ISO-8859-1"))) + ,@(if ttl + `((cache-control (max-age . ,ttl))) + '()) + + ;; XXX: We're not returning the actual contents, deferring + ;; instead to 'http-write'. This is a hack to work around + ;; <http://bugs.gnu.org/21093>. + (x-raw-file . ,cached)) + #f)) + ((let* ((hash (and=> (string-index store-item #\-) + (cut string-take store-item <>))) + (item (and hash + (guard (c ((store-error? c) #f)) + (hash-part->path store hash))))) + (and item (bypass-cache? store item))) + ;; Render STORE-ITEM live. We reach this because STORE-ITEM is + ;; being baked but clients are already asking for it. Thus, we're + ;; duplicating work, but doing so allows us to reduce delays. + (render-nar store request store-item + #:compression (low-compression compression))) + (else + (not-found request))))) (define (render-content-addressed-file store request name algo hash) @@ -1077,7 +1125,10 @@ methods, return the applicable compression." consider using the '--user' option!~%"))) (parameterize ((%public-key public-key) - (%private-key private-key)) + (%private-key private-key) + (cache-bypass-threshold + (or (assoc-ref opts 'cache-bypass-threshold) + (cache-bypass-threshold)))) (info (G_ "publishing ~a on ~a, port ~d~%") %store-directory (inet-ntop (sockaddr:fam address) (sockaddr:addr address)) diff --git a/tests/publish.scm b/tests/publish.scm index 13f667aa7e..84aa6e5d73 100644 --- a/tests/publish.scm +++ b/tests/publish.scm @@ -413,7 +413,8 @@ References: ~%" (call-with-new-thread (lambda () (guix-publish "--port=6797" "-C2" - (string-append "--cache=" cache))))))) + (string-append "--cache=" cache) + "--cache-bypass-threshold=0")))))) (wait-until-ready 6797) (let* ((base "http://localhost:6797/") (part (store-path-hash-part %item)) @@ -462,7 +463,8 @@ References: ~%" (call-with-new-thread (lambda () (guix-publish "--port=6794" "-Cgzip:2" "-Clzip:2" - (string-append "--cache=" cache))))))) + (string-append "--cache=" cache) + "--cache-bypass-threshold=0")))))) (wait-until-ready 6794) (let* ((base "http://localhost:6794/") (part (store-path-hash-part %item)) @@ -517,7 +519,8 @@ References: ~%" (call-with-new-thread (lambda () (guix-publish "--port=6796" "-C2" "--ttl=42h" - (string-append "--cache=" cache))))))) + (string-append "--cache=" cache) + "--cache-bypass-threshold=0")))))) (wait-until-ready 6796) (let* ((base "http://localhost:6796/") (part (store-path-hash-part item)) @@ -581,12 +584,44 @@ References: ~%" (basename item) ".narinfo")) (response (http-get url))) - (and (= 404 (response-code response)) + (and (= 200 (response-code response)) ;we're below the threshold (wait-for-file cached) (begin (delete-paths %store (list item)) (response-code (pk 'response (http-get url)))))))))) +(test-equal "with cache, cache bypass" + 200 + (call-with-temporary-directory + (lambda (cache) + (let ((thread (with-separate-output-ports + (call-with-new-thread + (lambda () + (guix-publish "--port=6788" "-C" "gzip" + (string-append "--cache=" cache))))))) + (wait-until-ready 6788) + + (let* ((base "http://localhost:6788/") + (item (add-text-to-store %store "random" (random-text))) + (part (store-path-hash-part item)) + (narinfo (string-append base part ".narinfo")) + (nar (string-append base "nar/gzip/" (basename item))) + (cached (string-append cache "/gzip/" (basename item) + ".narinfo"))) + ;; We're below the default cache bypass threshold, so NAR and NARINFO + ;; should immediately return 200. The NARINFO request should trigger + ;; caching, and the next request to NAR should return 200 as well. + (and (let ((response (pk 'r1 (http-get nar)))) + (and (= 200 (response-code response)) + (not (response-content-length response)))) ;not known + (= 200 (response-code (http-get narinfo))) + (begin + (wait-for-file cached) + (let ((response (pk 'r2 (http-get nar)))) + (and (> (response-content-length response) + (stat:size (stat item))) + (response-code response)))))))))) + (test-equal "/log/NAME" `(200 #t application/x-bzip2) (let ((drv (run-with-store %store |