diff options
author | Christopher Baines <mail@cbaines.net> | 2025-01-29 09:53:28 +0000 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2025-01-29 09:53:28 +0000 |
commit | bd7cde6ed979d0cd38be090b893715f96258b757 (patch) | |
tree | f1977818b24d27d20bd3fa09180b803c6191d4ee | |
parent | 9e4bae5a41e0f557ddb4a3c3c6dec3827d8b1195 (diff) | |
download | nar-herder-bd7cde6ed979d0cd38be090b893715f96258b757.tar nar-herder-bd7cde6ed979d0cd38be090b893715f96258b757.tar.gz |
WIPwip2
-rw-r--r-- | README.org | 24 | ||||
-rw-r--r-- | nar-herder/database.scm | 188 | ||||
-rw-r--r-- | nar-herder/removal.scm | 84 | ||||
-rw-r--r-- | nar-herder/server.scm | 33 | ||||
-rw-r--r-- | nar-herder/storage.scm | 1 | ||||
-rw-r--r-- | nginx/conf/nginx.conf | 35 | ||||
-rw-r--r-- | scripts/nar-herder.in | 123 |
7 files changed, 442 insertions, 46 deletions
@@ -101,3 +101,27 @@ prevent this. #+BEGIN_SRC sh nar-herder run-server --mirror=https://foo.example.com --storage=/var/lib/nars #+END_SRC + +* TODO + +** TODO Review the information in the database dump + +I think it contains entries for cached nars and other tables that +don't want to be synced to mirrors + +** TODO Allow gracefully removing nars + +By dropping the TTL until they're removed. + +*** TODO Bordeaux nar cleanup + +**** TODO Query for narinfos added_at more than 6 months ago, tagged with the unknown-if-for-master tag and not scheduled for removal. +**** TODO Check if data.guix.gnu.org knows about them +**** TODO If it does, them remove the tag +**** TODO If it doesn't, then schedule the narinfo for removal (only if it has no dependents) + +** TODO Take care to delete temporary source files + +** TODO Either cache not stored or stored nar files + +To speed up finding or not finding stored nars diff --git a/nar-herder/database.scm b/nar-herder/database.scm index 239a7e7..69df160 100644 --- a/nar-herder/database.scm +++ b/nar-herder/database.scm @@ -48,6 +48,7 @@ database-select-narinfo database-select-narinfo-by-hash database-select-narinfo-contents-by-hash + database-fold-narinfos database-count-recent-changes database-select-recent-changes @@ -70,7 +71,10 @@ database-fold-cached-narinfo-files database-remove-cached-narinfo-file + database-insert-scheduled-narinfo-removal database-select-scheduled-narinfo-removal + database-select-oldest-scheduled-narinfo-removal + database-select-scheduled-cached-narinfo-removal database-delete-scheduled-cached-narinfo-removal database-select-oldest-scheduled-cached-narinfo-removal @@ -151,7 +155,7 @@ CREATE TABLE cached_narinfo_files ( CREATE TABLE scheduled_narinfo_removal ( narinfo_id INTEGER PRIMARY KEY ASC REFERENCES narinfos (id), - removal_datetime TEXT NOT NULL + removal_request_inserted_at_datetime TEXT NOT NULL ); CREATE TABLE scheduled_cached_narinfo_removal ( @@ -221,7 +225,7 @@ CREATE TABLE cached_narinfo_files ( " CREATE TABLE scheduled_narinfo_removal ( narinfo_id INTEGER PRIMARY KEY ASC REFERENCES narinfos (id), - removal_datetime TEXT NOT NULL + removal_request_inserted_at_datetime TEXT NOT NULL );")) (unless (table-exists? db "scheduled_cached_narinfo_removal") @@ -442,6 +446,11 @@ CREATE UNIQUE INDEX IF NOT EXISTS PRAGMA analysis_limit=1000; PRAGMA optimize;"))) +(define (sqlite-step-and-reset statement) + (let ((val (sqlite-step statement))) + (sqlite-reset statement) + val)) + (define (database-optimize database) (retry-on-error (lambda () @@ -1044,6 +1053,89 @@ SELECT id, contents FROM narinfos WHERE substr(store_path, 12, 32) = :hash" (_ (values #f #f))))))))) +(define* (database-fold-narinfos database #:key + added-after-lt added-after-gt + (tags '()) + (not-tags '()) + (scheduled-for-removal 'unset) + limit) + (define (tag->expression db tag not?) + (let ((statement + (sqlite-prepare + db + " +SELECT id FROM tags WHERE key = :key AND value = :value" + #:cache? #t)) + (key-statement + (sqlite-prepare + db + " +SELECT id FROM tags WHERE key = :key" + #:cache? #t))) + (match tag + ((key . value) + (sqlite-bind-arguments statement + #:key key + #:value value) + + (match (sqlite-step-and-reset statement) + (#(id) + (string-append + (if not? "NOT " "") + "EXISTS (SELECT 1 FROM build_tags WHERE build_id = builds.id AND tag_id = " + (number->string id) + ")")) + (#f #f))) + (key + (sqlite-bind-arguments key-statement + #:key key) + + (let* ((tag-ids (sqlite-map + (match-lambda + (#(id) id)) + key-statementl)) + (result + (string-append + "(" + (string-join + (map + (lambda (id) + (string-append + (if not? "NOT " "") + "EXISTS (SELECT 1 FROM build_tags " + "WHERE build_id = builds.id AND tag_id = " + (number->string id) + ")")) + tag-ids) + (if not? " AND " " OR ")) + ")"))) + (sqlite-reset key-statement) + + result))))) + + (call-with-worker-thread + (database-reader-thread-channel database) + (lambda (db) + (let ((statement + (sqlite-prepare + db + " +SELECT id, store_path, nar_hash, nar_size, deriver, system, added_at +FROM narinfos +WHERE " + #:cache? #t))) + (sqlite-bind-arguments + statement + #:hash hash) + + (match (let ((result (sqlite-step statement))) + (sqlite-reset statement) + result) + (#(id store_path nar_hash nar_size deriver system added_at) + (values contents id)) + (_ + (values #f #f))))))) + (define (database-count-recent-changes database) (call-with-worker-thread (database-reader-thread-set database) @@ -1516,7 +1608,7 @@ WHERE narinfo_id = :narinfo_id (sqlite-prepare db " -SELECT removal_datetime +SELECT removal_request_inserted_at_datetime FROM scheduled_narinfo_removal WHERE narinfo_id = :narinfo_id" #:cache? #t))) @@ -1535,6 +1627,93 @@ WHERE narinfo_id = :narinfo_id" (sqlite-reset statement) result))))))) +(define (database-delete-scheduled-narinfo-removal database narinfo-id) + (call-with-worker-thread + (database-writer-thread-channel database) + (lambda (db) + (let ((statement + (sqlite-prepare + db + " +DELETE FROM scheduled_narinfo_removal +WHERE narinfo_id = :narinfo_id +RETURNING 1" + #:cache? #t))) + (sqlite-bind-arguments + statement + #:narinfo_id narinfo-id) + + (let ((result (->bool (sqlite-step statement)))) + (sqlite-reset statement) + + result))))) + +(define (database-insert-scheduled-narinfo-removal database + narinfo-id) + (call-with-worker-thread + (database-writer-thread-channel database) + (lambda (db) + (let ((statement + (sqlite-prepare + db + " +INSERT INTO scheduled_narinfo_removal ( + narinfo_id, removal_request_inserted_at_datetime +) VALUES ( + :narinfo_id, :removal_request_inserted_at_datetime +)" + #:cache? #t))) + + (sqlite-bind-arguments + statement + #:narinfo_id narinfo-id + #:removal_request_inserted_at_datetime + (date->string + (current-date) + "~Y-~m-~d ~H:~M:~S")) + + (sqlite-step statement) + (sqlite-reset statement) + + #t)))) + +(define (database-select-oldest-scheduled-narinfo-removal database) + (call-with-worker-thread + (database-reader-thread-channel database) + (lambda (db) + (let ((statement + (sqlite-prepare + db + " +SELECT cached_narinfo_files.id, + cached_narinfo_files.narinfo_id, + cached_narinfo_files.size, + cached_narinfo_files.compression, + narinfos.store_path, + scheduled_cached_narinfo_removal.removal_datetime +FROM scheduled_narinfo_removal +INNER JOIN narinfos + ON cached_narinfo_files.narinfo_id = narinfos.id +ORDER BY scheduled_narinfo_removal.removal_datetime DESC +LIMIT 1" + #:cache? #t))) + + (let ((result + (match (sqlite-step statement) + (#(id narinfo_id size compression store_path datetime) + `((id . ,id) + (narinfo-id . ,narinfo_id) + (size . ,size) + (compression . ,(string->symbol compression)) + (store-path . ,store_path) + (scheduled-removal-time . ,(date->time-utc + (string->date + datetime + "~Y-~m-~d ~H:~M:~S"))))) + (#f #f)))) + (sqlite-reset statement) + result))))) + (define (database-select-scheduled-cached-narinfo-removal database cached-narinfo-file-id) (call-with-time-tracking @@ -1584,7 +1763,8 @@ RETURNING 1" statement #:cached_narinfo_file_id cached-narinfo-file-id) - (let ((result (->bool (sqlite-step statement)))) + (let ((result (->bool (peek "STEP" (sqlite-step statement))))) + (peek "2" (sqlite-step statement)) (sqlite-reset statement) result))))) diff --git a/nar-herder/removal.scm b/nar-herder/removal.scm new file mode 100644 index 0000000..ebf1b5a --- /dev/null +++ b/nar-herder/removal.scm @@ -0,0 +1,84 @@ +;;; Nar Herder +;;; +;;; Copyright © 2024 Christopher Baines <mail@cbaines.net> +;;; +;;; This program is free software: you can redistribute it and/or +;;; modify it under the terms of the GNU Affero General Public License +;;; as published by the Free Software Foundation, either version 3 of +;;; the License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public +;;; License along with this program. If not, see +;;; <http://www.gnu.org/licenses/>. + +(define-module (nar-herder removal) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-19) + #:use-module (fibers) + #:use-module (nar-herder utils) + #:use-module (nar-herder database) + #:export (start-narinfo-removal-fiber + start-narinfo-removal-management-fiber)) + +(define (start-narinfo-removal-fiber database narinfo-removal-channel) + (spawn-fiber + (lambda () + (while #t + (let ((details (database-select-oldest-scheduled-narinfo-removal + database))) + (if scheduled time in the future + (sleep) + (narinfo-removal-channel-remove-narinfo + narinfo-removal-channel + details))))))) + +(define (narinfo-removal-channel-remove-narinfo narinfo-removal-channel + narinfo-id) + (put-message narinfo-removal-channel + (cons 'remove narinfo-id))) + +(define (remove-narinfo ) + (when storage + (remove-nar-files-by-hash + database + (assq-ref opts 'storage) + metrics-registry + (store-path-hash-part store-path) + #:error-unless-files-to-remove? #f)) + + (for-each + (lambda (cached-narinfo-details) + ;; It might not have been scheduled for + ;; removal, but remove any schedule that + ;; exists + (database-delete-scheduled-cached-narinfo-removal + database + (assq-ref cached-narinfo-details 'id)) + + ;; Remove all the database entries first, as + ;; that'll stop these files appearing in narinfos + (database-remove-cached-narinfo-file + database + (assq-ref narinfo-details 'id) + (symbol->string + (assq-ref cached-narinfo-details 'compression)))) + cached-narinfo-files) + + (database-remove-narinfo database store-path)) + +(define (start-narinfo-removal-management-fiber) + (define channel (make-channel)) + + (spawn-fiber + (lambda () + (while #t + (match (get-message channel) + (('remove narinfo-id) + (remove-narinfo)))))) + + channel) diff --git a/nar-herder/server.scm b/nar-herder/server.scm index b904675..bef56fa 100644 --- a/nar-herder/server.scm +++ b/nar-herder/server.scm @@ -178,6 +178,7 @@ (define* (make-request-handler database storage-root #:key base-ttl base-cached-compressions-ttl + narinfo-ttl negative-ttl logger metrics-registry maybe-trigger-creation-of-cached-nars @@ -277,14 +278,25 @@ ,(and=> (database-select-scheduled-narinfo-removal database narinfo-id) - (lambda (scheduled-removal-time) - (list - (max - (- (time-second - (time-difference scheduled-removal-time - (current-time))) - 60) - 0)))) + (lambda (removal-requested-at-datetime) + (let ((scheduled-removal-time + (add-duration + removal-requested-at-datetime + (make-time + time-duration + 0 + ;; Use the narinfo-ttl as + ;; this represents the TTL + ;; that should be used for + ;; removals + narinfo-ttl)))) + (list + (max + (- (time-second + (time-difference scheduled-removal-time + (current-time))) + 60) + 0))))) ,@(if (null? cached-narinfo-files) '() @@ -376,6 +388,9 @@ (string->symbol compression) #f))) + ;; TODO Double check the size of the served files, and error + ;; if it doesn't match + (if narinfo-file-for-compression (let ((loop? (any @@ -614,6 +629,7 @@ body #:download-size (response-content-length response)))) + ;; TODO Write to a temporary file (call-with-output-file (assq-ref opts 'database) (lambda (output-port) (dump-port port output-port))) @@ -932,6 +948,7 @@ canonical-storage #:base-ttl (or (assq-ref opts 'new-narinfo-ttl) (assq-ref opts 'narinfo-ttl)) + #:narinfo-ttl (assq-ref opts 'narinfo-ttl) #:base-cached-compressions-ttl (or (assq-ref opts 'new-cached-compressions-narinfo-ttl) (assq-ref opts 'cached-compressions-narinfo-ttl)) diff --git a/nar-herder/storage.scm b/nar-herder/storage.scm index 0e7186d..e13c056 100644 --- a/nar-herder/storage.scm +++ b/nar-herder/storage.scm @@ -639,6 +639,7 @@ When VERIFY-CERTIFICATE? is true, verify HTTPS server certificates." #:timeout 30)) #:unwind? #t) + ;; TODO Check the size of the file (rename-file tmp-file-name destination-file-name) diff --git a/nginx/conf/nginx.conf b/nginx/conf/nginx.conf index 9a4d6e6..6a3d443 100644 --- a/nginx/conf/nginx.conf +++ b/nginx/conf/nginx.conf @@ -1,5 +1,5 @@ daemon off; -error_log /dev/stdout info; +error_log /dev/stdout debug; events { } @@ -11,8 +11,13 @@ http { server 127.0.0.1:8080; } + upstream nar-storage { + server 127.0.0.1:8082; + } + server { listen 8081; + set $via "1.1 nginx1"; location ~ \.narinfo$ { proxy_pass http://nar-herder; @@ -22,6 +27,34 @@ http { proxy_pass http://nar-herder; } + location @nar-storage { + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_ignore_client_abort on; + if ($http_via) { + set $via "$http_via, $via"; + } + proxy_set_header Via $via; + proxy_set_header x-bar $http_via; + rewrite /internal/(.*) /$1 break; + proxy_pass http://nar-storage; + } + + location ~ ^/internal/nar/(.*)$ { + internal; + root /var/lib/nars; + + try_files /nar/$1 @nar-storage; + + error_page 404 /404; + + client_body_buffer_size 256k; + + # Nars are already compressed. + gzip off; + + } + location = /latest-database-dump { proxy_pass http://nar-herder; } diff --git a/scripts/nar-herder.in b/scripts/nar-herder.in index 812ec98..921cc70 100644 --- a/scripts/nar-herder.in +++ b/scripts/nar-herder.in @@ -125,6 +125,14 @@ (define %import-options-defaults '()) +(define %removal-options + (list (option '("immediate") #f #f + (lambda (opt name _ result) + (alist-cons 'immediate? #t result))))) + +(define %removal-options-defaults + '()) + (define %server-options (list (option '("port") #t #f (lambda (opt name arg result) @@ -495,8 +503,10 @@ (when (= 1 len) (simple-format (current-error-port) "imported narinfo\n"))))) (("remove" rest ...) - (let* ((opts (parse-options %base-options - %base-option-defaults + (let* ((opts (parse-options (append %base-options + %removal-options) + (append %base-option-defaults + %removal-options-defaults) rest)) (metrics-registry (make-metrics-registry #:namespace @@ -534,40 +544,84 @@ database (assq-ref narinfo-details 'id)))) - (log-msg 'INFO "removing " store-path) - - (if (assq-ref opts 'storage) + (if (assq-ref opts 'immediate?) (begin - (remove-nar-files-by-hash - database - (assq-ref opts 'storage) - metrics-registry - (store-path-hash-part store-path) - #:error-unless-files-to-remove? #f)) - (log-msg - 'WARN "no --storage set, so just removing from the database")) + (log-msg 'INFO "removing " store-path) + + (if (assq-ref opts 'storage) + (begin + (remove-nar-files-by-hash + database + (assq-ref opts 'storage) + metrics-registry + (store-path-hash-part store-path) + #:error-unless-files-to-remove? #f)) + (log-msg + 'WARN "no --storage set, so just removing from the database")) + + (for-each + (lambda (cached-narinfo-details) + ;; It might not have been scheduled for + ;; removal, but remove any schedule that + ;; exists + (database-delete-scheduled-cached-narinfo-removal + database + (assq-ref cached-narinfo-details 'id)) - (for-each - (lambda (cached-narinfo-details) - ;; It might not have been scheduled for - ;; removal, but remove any schedule that - ;; exists - (database-delete-scheduled-cached-narinfo-removal - database - (assq-ref cached-narinfo-details 'id)) + ;; Remove all the database entries first, as + ;; that'll stop these files appearing in narinfos + (database-remove-cached-narinfo-file + database + (assq-ref narinfo-details 'id) + (symbol->string + (assq-ref cached-narinfo-details 'compression)))) + cached-narinfo-files) - ;; Remove all the database entries first, as - ;; that'll stop these files appearing in narinfos - (database-remove-cached-narinfo-file - database - (assq-ref narinfo-details 'id) - (symbol->string - (assq-ref cached-narinfo-details 'compression)))) - cached-narinfo-files) + (database-remove-narinfo database store-path)) + (begin + (log-msg 'INFO "scheduling removal for " store-path) - (database-remove-narinfo database store-path)) + (database-insert-scheduled-narinfo-removal + database + (assq-ref narinfo-details 'id))))) (log-msg 'WARN store-path " not found to remove")))) (assq-ref opts 'arguments)))) + (("list" rest ...) + (let* ((opts (parse-options (append %base-options + %list-options) + (append %base-option-defaults + %list-options-defaults) + rest)) + (metrics-registry + (make-metrics-registry #:namespace + "narherder")) + (database (setup-database + (assq-ref opts 'database) + metrics-registry)) + (lgr (make <logger>)) + (port-log (make <port-log> + #:port (current-output-port) + #:formatter + ;; In guile-lib v0.2.8 onwards, the formatter is + ;; called with more arguments + (lambda args ; lvl, time, str + (format #f "~a (~5a): ~a~%" + (strftime "%F %H:%M:%S" (localtime + (second args))) + (first args) + (third args)))))) + + (add-handler! lgr port-log) + (open-log! lgr) + (set-default-logger! lgr) + + ;; added_at more than 6 months ago + ;; tagged with the unknown-if-for-master tag + ;; not scheduled for removal. + ;; limit 1000 + + #f + ) (("check" rest ...) (let* ((opts (parse-options (append %base-options %check-options) @@ -607,9 +661,12 @@ (canonical-storage (and=> (assq-ref opts 'storage) canonicalize-path))) - (check-storage database - canonical-storage - metrics-registry)))) + (database-delete-scheduled-cached-narinfo-removal + database + 1)))) + ;; (check-storage database + ;; canonical-storage + ;; metrics-registry)))) (("run-server" rest ...) (simple-format (current-error-port) "locale is ~A\n" (check-locale!)) |