aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2025-01-29 09:53:28 +0000
committerChristopher Baines <mail@cbaines.net>2025-01-29 09:53:28 +0000
commitbd7cde6ed979d0cd38be090b893715f96258b757 (patch)
treef1977818b24d27d20bd3fa09180b803c6191d4ee
parent9e4bae5a41e0f557ddb4a3c3c6dec3827d8b1195 (diff)
downloadnar-herder-bd7cde6ed979d0cd38be090b893715f96258b757.tar
nar-herder-bd7cde6ed979d0cd38be090b893715f96258b757.tar.gz
WIPwip2
-rw-r--r--README.org24
-rw-r--r--nar-herder/database.scm188
-rw-r--r--nar-herder/removal.scm84
-rw-r--r--nar-herder/server.scm33
-rw-r--r--nar-herder/storage.scm1
-rw-r--r--nginx/conf/nginx.conf35
-rw-r--r--scripts/nar-herder.in123
7 files changed, 442 insertions, 46 deletions
diff --git a/README.org b/README.org
index 567c745..82b1ce1 100644
--- a/README.org
+++ b/README.org
@@ -101,3 +101,27 @@ prevent this.
#+BEGIN_SRC sh
nar-herder run-server --mirror=https://foo.example.com --storage=/var/lib/nars
#+END_SRC
+
+* TODO
+
+** TODO Review the information in the database dump
+
+I think it contains entries for cached nars and other tables that
+don't want to be synced to mirrors
+
+** TODO Allow gracefully removing nars
+
+By dropping the TTL until they're removed.
+
+*** TODO Bordeaux nar cleanup
+
+**** TODO Query for narinfos added_at more than 6 months ago, tagged with the unknown-if-for-master tag and not scheduled for removal.
+**** TODO Check if data.guix.gnu.org knows about them
+**** TODO If it does, them remove the tag
+**** TODO If it doesn't, then schedule the narinfo for removal (only if it has no dependents)
+
+** TODO Take care to delete temporary source files
+
+** TODO Either cache not stored or stored nar files
+
+To speed up finding or not finding stored nars
diff --git a/nar-herder/database.scm b/nar-herder/database.scm
index 239a7e7..69df160 100644
--- a/nar-herder/database.scm
+++ b/nar-herder/database.scm
@@ -48,6 +48,7 @@
database-select-narinfo
database-select-narinfo-by-hash
database-select-narinfo-contents-by-hash
+ database-fold-narinfos
database-count-recent-changes
database-select-recent-changes
@@ -70,7 +71,10 @@
database-fold-cached-narinfo-files
database-remove-cached-narinfo-file
+ database-insert-scheduled-narinfo-removal
database-select-scheduled-narinfo-removal
+ database-select-oldest-scheduled-narinfo-removal
+
database-select-scheduled-cached-narinfo-removal
database-delete-scheduled-cached-narinfo-removal
database-select-oldest-scheduled-cached-narinfo-removal
@@ -151,7 +155,7 @@ CREATE TABLE cached_narinfo_files (
CREATE TABLE scheduled_narinfo_removal (
narinfo_id INTEGER PRIMARY KEY ASC REFERENCES narinfos (id),
- removal_datetime TEXT NOT NULL
+ removal_request_inserted_at_datetime TEXT NOT NULL
);
CREATE TABLE scheduled_cached_narinfo_removal (
@@ -221,7 +225,7 @@ CREATE TABLE cached_narinfo_files (
"
CREATE TABLE scheduled_narinfo_removal (
narinfo_id INTEGER PRIMARY KEY ASC REFERENCES narinfos (id),
- removal_datetime TEXT NOT NULL
+ removal_request_inserted_at_datetime TEXT NOT NULL
);"))
(unless (table-exists? db "scheduled_cached_narinfo_removal")
@@ -442,6 +446,11 @@ CREATE UNIQUE INDEX IF NOT EXISTS
PRAGMA analysis_limit=1000;
PRAGMA optimize;")))
+(define (sqlite-step-and-reset statement)
+ (let ((val (sqlite-step statement)))
+ (sqlite-reset statement)
+ val))
+
(define (database-optimize database)
(retry-on-error
(lambda ()
@@ -1044,6 +1053,89 @@ SELECT id, contents FROM narinfos WHERE substr(store_path, 12, 32) = :hash"
(_
(values #f #f)))))))))
+(define* (database-fold-narinfos database #:key
+ added-after-lt added-after-gt
+ (tags '())
+ (not-tags '())
+ (scheduled-for-removal 'unset)
+ limit)
+ (define (tag->expression db tag not?)
+ (let ((statement
+ (sqlite-prepare
+ db
+ "
+SELECT id FROM tags WHERE key = :key AND value = :value"
+ #:cache? #t))
+ (key-statement
+ (sqlite-prepare
+ db
+ "
+SELECT id FROM tags WHERE key = :key"
+ #:cache? #t)))
+ (match tag
+ ((key . value)
+ (sqlite-bind-arguments statement
+ #:key key
+ #:value value)
+
+ (match (sqlite-step-and-reset statement)
+ (#(id)
+ (string-append
+ (if not? "NOT " "")
+ "EXISTS (SELECT 1 FROM build_tags WHERE build_id = builds.id AND tag_id = "
+ (number->string id)
+ ")"))
+ (#f #f)))
+ (key
+ (sqlite-bind-arguments key-statement
+ #:key key)
+
+ (let* ((tag-ids (sqlite-map
+ (match-lambda
+ (#(id) id))
+ key-statementl))
+ (result
+ (string-append
+ "("
+ (string-join
+ (map
+ (lambda (id)
+ (string-append
+ (if not? "NOT " "")
+ "EXISTS (SELECT 1 FROM build_tags "
+ "WHERE build_id = builds.id AND tag_id = "
+ (number->string id)
+ ")"))
+ tag-ids)
+ (if not? " AND " " OR "))
+ ")")))
+ (sqlite-reset key-statement)
+
+ result)))))
+
+ (call-with-worker-thread
+ (database-reader-thread-channel database)
+ (lambda (db)
+ (let ((statement
+ (sqlite-prepare
+ db
+ "
+SELECT id, store_path, nar_hash, nar_size, deriver, system, added_at
+FROM narinfos
+WHERE "
+ #:cache? #t)))
+ (sqlite-bind-arguments
+ statement
+ #:hash hash)
+
+ (match (let ((result (sqlite-step statement)))
+ (sqlite-reset statement)
+ result)
+ (#(id store_path nar_hash nar_size deriver system added_at)
+ (values contents id))
+ (_
+ (values #f #f)))))))
+
(define (database-count-recent-changes database)
(call-with-worker-thread
(database-reader-thread-set database)
@@ -1516,7 +1608,7 @@ WHERE narinfo_id = :narinfo_id
(sqlite-prepare
db
"
-SELECT removal_datetime
+SELECT removal_request_inserted_at_datetime
FROM scheduled_narinfo_removal
WHERE narinfo_id = :narinfo_id"
#:cache? #t)))
@@ -1535,6 +1627,93 @@ WHERE narinfo_id = :narinfo_id"
(sqlite-reset statement)
result)))))))
+(define (database-delete-scheduled-narinfo-removal database narinfo-id)
+ (call-with-worker-thread
+ (database-writer-thread-channel database)
+ (lambda (db)
+ (let ((statement
+ (sqlite-prepare
+ db
+ "
+DELETE FROM scheduled_narinfo_removal
+WHERE narinfo_id = :narinfo_id
+RETURNING 1"
+ #:cache? #t)))
+ (sqlite-bind-arguments
+ statement
+ #:narinfo_id narinfo-id)
+
+ (let ((result (->bool (sqlite-step statement))))
+ (sqlite-reset statement)
+
+ result)))))
+
+(define (database-insert-scheduled-narinfo-removal database
+ narinfo-id)
+ (call-with-worker-thread
+ (database-writer-thread-channel database)
+ (lambda (db)
+ (let ((statement
+ (sqlite-prepare
+ db
+ "
+INSERT INTO scheduled_narinfo_removal (
+ narinfo_id, removal_request_inserted_at_datetime
+) VALUES (
+ :narinfo_id, :removal_request_inserted_at_datetime
+)"
+ #:cache? #t)))
+
+ (sqlite-bind-arguments
+ statement
+ #:narinfo_id narinfo-id
+ #:removal_request_inserted_at_datetime
+ (date->string
+ (current-date)
+ "~Y-~m-~d ~H:~M:~S"))
+
+ (sqlite-step statement)
+ (sqlite-reset statement)
+
+ #t))))
+
+(define (database-select-oldest-scheduled-narinfo-removal database)
+ (call-with-worker-thread
+ (database-reader-thread-channel database)
+ (lambda (db)
+ (let ((statement
+ (sqlite-prepare
+ db
+ "
+SELECT cached_narinfo_files.id,
+ cached_narinfo_files.narinfo_id,
+ cached_narinfo_files.size,
+ cached_narinfo_files.compression,
+ narinfos.store_path,
+ scheduled_cached_narinfo_removal.removal_datetime
+FROM scheduled_narinfo_removal
+INNER JOIN narinfos
+ ON cached_narinfo_files.narinfo_id = narinfos.id
+ORDER BY scheduled_narinfo_removal.removal_datetime DESC
+LIMIT 1"
+ #:cache? #t)))
+
+ (let ((result
+ (match (sqlite-step statement)
+ (#(id narinfo_id size compression store_path datetime)
+ `((id . ,id)
+ (narinfo-id . ,narinfo_id)
+ (size . ,size)
+ (compression . ,(string->symbol compression))
+ (store-path . ,store_path)
+ (scheduled-removal-time . ,(date->time-utc
+ (string->date
+ datetime
+ "~Y-~m-~d ~H:~M:~S")))))
+ (#f #f))))
+ (sqlite-reset statement)
+ result)))))
+
(define (database-select-scheduled-cached-narinfo-removal database
cached-narinfo-file-id)
(call-with-time-tracking
@@ -1584,7 +1763,8 @@ RETURNING 1"
statement
#:cached_narinfo_file_id cached-narinfo-file-id)
- (let ((result (->bool (sqlite-step statement))))
+ (let ((result (->bool (peek "STEP" (sqlite-step statement)))))
+ (peek "2" (sqlite-step statement))
(sqlite-reset statement)
result)))))
diff --git a/nar-herder/removal.scm b/nar-herder/removal.scm
new file mode 100644
index 0000000..ebf1b5a
--- /dev/null
+++ b/nar-herder/removal.scm
@@ -0,0 +1,84 @@
+;;; Nar Herder
+;;;
+;;; Copyright © 2024 Christopher Baines <mail@cbaines.net>
+;;;
+;;; This program is free software: you can redistribute it and/or
+;;; modify it under the terms of the GNU Affero General Public License
+;;; as published by the Free Software Foundation, either version 3 of
+;;; the License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with this program. If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+(define-module (nar-herder removal)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-19)
+ #:use-module (fibers)
+ #:use-module (nar-herder utils)
+ #:use-module (nar-herder database)
+ #:export (start-narinfo-removal-fiber
+ start-narinfo-removal-management-fiber))
+
+(define (start-narinfo-removal-fiber database narinfo-removal-channel)
+ (spawn-fiber
+ (lambda ()
+ (while #t
+ (let ((details (database-select-oldest-scheduled-narinfo-removal
+ database)))
+ (if scheduled time in the future
+ (sleep)
+ (narinfo-removal-channel-remove-narinfo
+ narinfo-removal-channel
+ details)))))))
+
+(define (narinfo-removal-channel-remove-narinfo narinfo-removal-channel
+ narinfo-id)
+ (put-message narinfo-removal-channel
+ (cons 'remove narinfo-id)))
+
+(define (remove-narinfo )
+ (when storage
+ (remove-nar-files-by-hash
+ database
+ (assq-ref opts 'storage)
+ metrics-registry
+ (store-path-hash-part store-path)
+ #:error-unless-files-to-remove? #f))
+
+ (for-each
+ (lambda (cached-narinfo-details)
+ ;; It might not have been scheduled for
+ ;; removal, but remove any schedule that
+ ;; exists
+ (database-delete-scheduled-cached-narinfo-removal
+ database
+ (assq-ref cached-narinfo-details 'id))
+
+ ;; Remove all the database entries first, as
+ ;; that'll stop these files appearing in narinfos
+ (database-remove-cached-narinfo-file
+ database
+ (assq-ref narinfo-details 'id)
+ (symbol->string
+ (assq-ref cached-narinfo-details 'compression))))
+ cached-narinfo-files)
+
+ (database-remove-narinfo database store-path))
+
+(define (start-narinfo-removal-management-fiber)
+ (define channel (make-channel))
+
+ (spawn-fiber
+ (lambda ()
+ (while #t
+ (match (get-message channel)
+ (('remove narinfo-id)
+ (remove-narinfo))))))
+
+ channel)
diff --git a/nar-herder/server.scm b/nar-herder/server.scm
index b904675..bef56fa 100644
--- a/nar-herder/server.scm
+++ b/nar-herder/server.scm
@@ -178,6 +178,7 @@
(define* (make-request-handler database storage-root
#:key base-ttl base-cached-compressions-ttl
+ narinfo-ttl
negative-ttl logger
metrics-registry
maybe-trigger-creation-of-cached-nars
@@ -277,14 +278,25 @@
,(and=> (database-select-scheduled-narinfo-removal
database
narinfo-id)
- (lambda (scheduled-removal-time)
- (list
- (max
- (- (time-second
- (time-difference scheduled-removal-time
- (current-time)))
- 60)
- 0))))
+ (lambda (removal-requested-at-datetime)
+ (let ((scheduled-removal-time
+ (add-duration
+ removal-requested-at-datetime
+ (make-time
+ time-duration
+ 0
+ ;; Use the narinfo-ttl as
+ ;; this represents the TTL
+ ;; that should be used for
+ ;; removals
+ narinfo-ttl))))
+ (list
+ (max
+ (- (time-second
+ (time-difference scheduled-removal-time
+ (current-time)))
+ 60)
+ 0)))))
,@(if (null? cached-narinfo-files)
'()
@@ -376,6 +388,9 @@
(string->symbol compression)
#f)))
+ ;; TODO Double check the size of the served files, and error
+ ;; if it doesn't match
+
(if narinfo-file-for-compression
(let ((loop?
(any
@@ -614,6 +629,7 @@
body
#:download-size (response-content-length response))))
+ ;; TODO Write to a temporary file
(call-with-output-file (assq-ref opts 'database)
(lambda (output-port)
(dump-port port output-port)))
@@ -932,6 +948,7 @@
canonical-storage
#:base-ttl (or (assq-ref opts 'new-narinfo-ttl)
(assq-ref opts 'narinfo-ttl))
+ #:narinfo-ttl (assq-ref opts 'narinfo-ttl)
#:base-cached-compressions-ttl
(or (assq-ref opts 'new-cached-compressions-narinfo-ttl)
(assq-ref opts 'cached-compressions-narinfo-ttl))
diff --git a/nar-herder/storage.scm b/nar-herder/storage.scm
index 0e7186d..e13c056 100644
--- a/nar-herder/storage.scm
+++ b/nar-herder/storage.scm
@@ -639,6 +639,7 @@ When VERIFY-CERTIFICATE? is true, verify HTTPS server certificates."
#:timeout 30))
#:unwind? #t)
+ ;; TODO Check the size of the file
(rename-file tmp-file-name
destination-file-name)
diff --git a/nginx/conf/nginx.conf b/nginx/conf/nginx.conf
index 9a4d6e6..6a3d443 100644
--- a/nginx/conf/nginx.conf
+++ b/nginx/conf/nginx.conf
@@ -1,5 +1,5 @@
daemon off;
-error_log /dev/stdout info;
+error_log /dev/stdout debug;
events {
}
@@ -11,8 +11,13 @@ http {
server 127.0.0.1:8080;
}
+ upstream nar-storage {
+ server 127.0.0.1:8082;
+ }
+
server {
listen 8081;
+ set $via "1.1 nginx1";
location ~ \.narinfo$ {
proxy_pass http://nar-herder;
@@ -22,6 +27,34 @@ http {
proxy_pass http://nar-herder;
}
+ location @nar-storage {
+ proxy_http_version 1.1;
+ proxy_set_header Connection "";
+ proxy_ignore_client_abort on;
+ if ($http_via) {
+ set $via "$http_via, $via";
+ }
+ proxy_set_header Via $via;
+ proxy_set_header x-bar $http_via;
+ rewrite /internal/(.*) /$1 break;
+ proxy_pass http://nar-storage;
+ }
+
+ location ~ ^/internal/nar/(.*)$ {
+ internal;
+ root /var/lib/nars;
+
+ try_files /nar/$1 @nar-storage;
+
+ error_page 404 /404;
+
+ client_body_buffer_size 256k;
+
+ # Nars are already compressed.
+ gzip off;
+
+ }
+
location = /latest-database-dump {
proxy_pass http://nar-herder;
}
diff --git a/scripts/nar-herder.in b/scripts/nar-herder.in
index 812ec98..921cc70 100644
--- a/scripts/nar-herder.in
+++ b/scripts/nar-herder.in
@@ -125,6 +125,14 @@
(define %import-options-defaults
'())
+(define %removal-options
+ (list (option '("immediate") #f #f
+ (lambda (opt name _ result)
+ (alist-cons 'immediate? #t result)))))
+
+(define %removal-options-defaults
+ '())
+
(define %server-options
(list (option '("port") #t #f
(lambda (opt name arg result)
@@ -495,8 +503,10 @@
(when (= 1 len)
(simple-format (current-error-port) "imported narinfo\n")))))
(("remove" rest ...)
- (let* ((opts (parse-options %base-options
- %base-option-defaults
+ (let* ((opts (parse-options (append %base-options
+ %removal-options)
+ (append %base-option-defaults
+ %removal-options-defaults)
rest))
(metrics-registry
(make-metrics-registry #:namespace
@@ -534,40 +544,84 @@
database
(assq-ref narinfo-details 'id))))
- (log-msg 'INFO "removing " store-path)
-
- (if (assq-ref opts 'storage)
+ (if (assq-ref opts 'immediate?)
(begin
- (remove-nar-files-by-hash
- database
- (assq-ref opts 'storage)
- metrics-registry
- (store-path-hash-part store-path)
- #:error-unless-files-to-remove? #f))
- (log-msg
- 'WARN "no --storage set, so just removing from the database"))
+ (log-msg 'INFO "removing " store-path)
+
+ (if (assq-ref opts 'storage)
+ (begin
+ (remove-nar-files-by-hash
+ database
+ (assq-ref opts 'storage)
+ metrics-registry
+ (store-path-hash-part store-path)
+ #:error-unless-files-to-remove? #f))
+ (log-msg
+ 'WARN "no --storage set, so just removing from the database"))
+
+ (for-each
+ (lambda (cached-narinfo-details)
+ ;; It might not have been scheduled for
+ ;; removal, but remove any schedule that
+ ;; exists
+ (database-delete-scheduled-cached-narinfo-removal
+ database
+ (assq-ref cached-narinfo-details 'id))
- (for-each
- (lambda (cached-narinfo-details)
- ;; It might not have been scheduled for
- ;; removal, but remove any schedule that
- ;; exists
- (database-delete-scheduled-cached-narinfo-removal
- database
- (assq-ref cached-narinfo-details 'id))
+ ;; Remove all the database entries first, as
+ ;; that'll stop these files appearing in narinfos
+ (database-remove-cached-narinfo-file
+ database
+ (assq-ref narinfo-details 'id)
+ (symbol->string
+ (assq-ref cached-narinfo-details 'compression))))
+ cached-narinfo-files)
- ;; Remove all the database entries first, as
- ;; that'll stop these files appearing in narinfos
- (database-remove-cached-narinfo-file
- database
- (assq-ref narinfo-details 'id)
- (symbol->string
- (assq-ref cached-narinfo-details 'compression))))
- cached-narinfo-files)
+ (database-remove-narinfo database store-path))
+ (begin
+ (log-msg 'INFO "scheduling removal for " store-path)
- (database-remove-narinfo database store-path))
+ (database-insert-scheduled-narinfo-removal
+ database
+ (assq-ref narinfo-details 'id)))))
(log-msg 'WARN store-path " not found to remove"))))
(assq-ref opts 'arguments))))
+ (("list" rest ...)
+ (let* ((opts (parse-options (append %base-options
+ %list-options)
+ (append %base-option-defaults
+ %list-options-defaults)
+ rest))
+ (metrics-registry
+ (make-metrics-registry #:namespace
+ "narherder"))
+ (database (setup-database
+ (assq-ref opts 'database)
+ metrics-registry))
+ (lgr (make <logger>))
+ (port-log (make <port-log>
+ #:port (current-output-port)
+ #:formatter
+ ;; In guile-lib v0.2.8 onwards, the formatter is
+ ;; called with more arguments
+ (lambda args ; lvl, time, str
+ (format #f "~a (~5a): ~a~%"
+ (strftime "%F %H:%M:%S" (localtime
+ (second args)))
+ (first args)
+ (third args))))))
+
+ (add-handler! lgr port-log)
+ (open-log! lgr)
+ (set-default-logger! lgr)
+
+ ;; added_at more than 6 months ago
+ ;; tagged with the unknown-if-for-master tag
+ ;; not scheduled for removal.
+ ;; limit 1000
+
+ #f
+ )
(("check" rest ...)
(let* ((opts (parse-options (append %base-options
%check-options)
@@ -607,9 +661,12 @@
(canonical-storage (and=> (assq-ref opts 'storage)
canonicalize-path)))
- (check-storage database
- canonical-storage
- metrics-registry))))
+ (database-delete-scheduled-cached-narinfo-removal
+ database
+ 1))))
+ ;; (check-storage database
+ ;; canonical-storage
+ ;; metrics-registry))))
(("run-server" rest ...)
(simple-format (current-error-port) "locale is ~A\n" (check-locale!))