From 3dfa9212f1a492103e3c639ab16e7ccfdddc9134 Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Fri, 12 Jul 2019 20:02:44 +0100 Subject: Kill long running load new revision jobs There are some revisions of Guix which take forever to process (or days at least). To avoid jobs being processed forever, kill them after they've been running for a while (default 24 hours). --- guix-data-service/jobs.scm | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'guix-data-service') diff --git a/guix-data-service/jobs.scm b/guix-data-service/jobs.scm index 2d515ca..a63821f 100644 --- a/guix-data-service/jobs.scm +++ b/guix-data-service/jobs.scm @@ -21,10 +21,15 @@ 4)) 1)) +(define default-timeout + (* (* 60 60) ;; 1 hour in seconds + 24)) + (define* (process-jobs-concurrently fetch-new-jobs process-job #:key (max-processes - default-max-processes)) + default-max-processes) + (timeout default-timeout)) (define processes (make-hash-table)) @@ -44,9 +49,10 @@ "\n" (string-concatenate (hash-map->list - (lambda (pid job-args) - (format #f " pid: ~5d job args: ~a\n" - pid job-args)) + (match-lambda* + ((pid (start-time job-args)) + (format #f " pid: ~5d job args: ~a\n" + pid job-args))) processes)) "\n"))) @@ -59,17 +65,32 @@ ;; No process to wait for #f) ((pid . status) - (let ((job-args (hashv-ref processes pid))) - (hashv-remove! processes pid) - (simple-format - (current-error-port) - "pid ~A failed with status ~A\n" - pid status)) + (hashv-remove! processes pid) + (simple-format (current-error-port) + "pid ~A failed with status ~A\n" + pid status) + + ;; Recurse, to check for other finished processes. (wait-on-processes)))) (lambda (key . args) (simple-format #t "key ~A args ~A\n" key args)))) + (define (kill-long-running-processes) + (hash-map->list + (match-lambda* + ((pid (start-time job-args)) + (let ((running-for + (- (current-time) start-time))) + (when (> running-for timeout) + (display + (simple-format + #f "sending SIGTERM to pid ~A started at ~A, now running for ~A\n" + pid start-time running-for) + (current-error-port)) + (kill pid SIGTERM))))) + processes)) + (define (fork-and-process-job job-args) (match (primitive-fork) (0 @@ -80,10 +101,12 @@ (lambda () (primitive-exit 127)))) (pid - (hashv-set! processes pid job-args) + (hashv-set! processes pid + (list (current-time) job-args)) #t))) (while #t + (kill-long-running-processes) (wait-on-processes) (display-status) (match (fetch-new-jobs) -- cgit v1.2.3