aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2023-05-05 19:19:31 +0100
committerChristopher Baines <mail@cbaines.net>2023-05-05 20:17:52 +0100
commit093af3f90b986cb3f3dc387e307b6e08b09a1c9f (patch)
treef7866120ca7130062ebac798cfbc6813210178ab
parentab4b35fe4cd998a22a6c678628ed23c28b2e3a50 (diff)
downloadbuild-coordinator-093af3f90b986cb3f3dc387e307b6e08b09a1c9f.tar
build-coordinator-093af3f90b986cb3f3dc387e307b6e08b09a1c9f.tar.gz
Include system uptime in the agent status information
As I've found this useful in spotting systems which have problems.
-rw-r--r--guix-build-coordinator/agent-messaging/http.scm6
-rw-r--r--guix-build-coordinator/agent-messaging/http/server.scm2
-rw-r--r--guix-build-coordinator/agent.scm2
-rw-r--r--guix-build-coordinator/coordinator.scm4
-rw-r--r--guix-build-coordinator/datastore/sqlite.scm6
-rw-r--r--sqitch/pg/deploy/system_uptime.sql7
-rw-r--r--sqitch/pg/revert/system_uptime.sql7
-rw-r--r--sqitch/pg/verify/system_uptime.sql7
-rw-r--r--sqitch/sqitch.plan1
-rw-r--r--sqitch/sqlite/deploy/system_uptime.sql16
-rw-r--r--sqitch/sqlite/revert/system_uptime.sql7
-rw-r--r--sqitch/sqlite/verify/system_uptime.sql7
12 files changed, 68 insertions, 4 deletions
diff --git a/guix-build-coordinator/agent-messaging/http.scm b/guix-build-coordinator/agent-messaging/http.scm
index a76fe90..d3e4fcd 100644
--- a/guix-build-coordinator/agent-messaging/http.scm
+++ b/guix-build-coordinator/agent-messaging/http.scm
@@ -247,7 +247,8 @@
.
args)
(apply
- (lambda* (status #:key 1min-load-average (log default-log)
+ (lambda* (status #:key 1min-load-average system-uptime
+ (log default-log)
initial-status-update?)
(coordinator-http-request
log
@@ -260,6 +261,9 @@
`((load_average
. (("1" . ,1min-load-average))))
'())
+ ,@(if system-uptime
+ `((system_uptime . ,system-uptime))
+ '())
,@(if initial-status-update?
`((initial_status_update . #t))
'()))))
diff --git a/guix-build-coordinator/agent-messaging/http/server.scm b/guix-build-coordinator/agent-messaging/http/server.scm
index d2f4e2d..0ca97a6 100644
--- a/guix-build-coordinator/agent-messaging/http/server.scm
+++ b/guix-build-coordinator/agent-messaging/http/server.scm
@@ -260,12 +260,14 @@ port. Also, the port used can be changed by passing the --port option.\n"
(1min-load-average (and=> (assoc-ref json-body "load_average")
(lambda (load-average)
(assoc-ref load-average "1"))))
+ (system-uptime (assoc-ref json-body "system_uptime"))
(processor-count (assoc-ref json-body "processor_count"))
(initial-status-update
(eq? #t (assoc-ref json-body "initial_status_update"))))
(update-agent-status build-coordinator uuid
status 1min-load-average
+ system-uptime
processor-count
#:initial-status-update?
initial-status-update))
diff --git a/guix-build-coordinator/agent.scm b/guix-build-coordinator/agent.scm
index b05583f..ab81b03 100644
--- a/guix-build-coordinator/agent.scm
+++ b/guix-build-coordinator/agent.scm
@@ -490,6 +490,7 @@
'idle
#:1min-load-average
(get-load-average #:period 1)
+ #:system-uptime (get-uptime)
#:initial-status-update? #t
#:log (build-log-procedure lgr))))
(for-each
@@ -534,6 +535,7 @@
'active)
#:1min-load-average
(get-load-average #:period 1)
+ #:system-uptime (get-uptime)
#:log (build-log-procedure lgr)))
#:unwind? #t)
diff --git a/guix-build-coordinator/coordinator.scm b/guix-build-coordinator/coordinator.scm
index f2d0089..b39c4d1 100644
--- a/guix-build-coordinator/coordinator.scm
+++ b/guix-build-coordinator/coordinator.scm
@@ -861,7 +861,7 @@
(define* (update-agent-status coordinator agent-uuid
status 1min-load-average
- processor-count
+ system-uptime processor-count
#:key initial-status-update?)
(define datastore
(build-coordinator-datastore coordinator))
@@ -886,6 +886,7 @@
agent-uuid
status
1min-load-average
+ system-uptime
processor-count)
(build-coordinator-send-event
@@ -895,6 +896,7 @@
(status . ,status)
(load_average
. ((1 . ,1min-load-average)))
+ (system_uptime . ,system-uptime)
(processor_count . ,processor-count))))
(define (trigger-build-allocation build-coordinator)
diff --git a/guix-build-coordinator/datastore/sqlite.scm b/guix-build-coordinator/datastore/sqlite.scm
index f94e2ee..069a18a 100644
--- a/guix-build-coordinator/datastore/sqlite.scm
+++ b/guix-build-coordinator/datastore/sqlite.scm
@@ -827,6 +827,7 @@ WHERE agent_id = :agent_id"
agent-uuid
status
1min-load-average
+ system-uptime
processor-count)
(call-with-worker-thread
(slot-ref datastore 'worker-writer-thread-channel)
@@ -846,14 +847,15 @@ DELETE FROM agent_status WHERE agent_id = :uuid"
(sqlite-prepare
db
"
-INSERT INTO agent_status (agent_id, status, load_average_1min, processor_count)
- VALUES (:uuid, :status, :load, :processor_count)"
+INSERT INTO agent_status (agent_id, status, load_average_1min, system_uptime, processor_count)
+ VALUES (:uuid, :status, :load, :system_uptime, :processor_count)"
#:cache? #t)))
(sqlite-bind-arguments statement
#:uuid agent-uuid
#:status status
#:load 1min-load-average
+ #:system_uptime system-uptime
#:processor_count processor-count)
(sqlite-step-and-reset statement))))
diff --git a/sqitch/pg/deploy/system_uptime.sql b/sqitch/pg/deploy/system_uptime.sql
new file mode 100644
index 0000000..916781c
--- /dev/null
+++ b/sqitch/pg/deploy/system_uptime.sql
@@ -0,0 +1,7 @@
+-- Deploy guix-build-coordinator:system_uptime to pg
+
+BEGIN;
+
+-- XXX Add DDLs here.
+
+COMMIT;
diff --git a/sqitch/pg/revert/system_uptime.sql b/sqitch/pg/revert/system_uptime.sql
new file mode 100644
index 0000000..2c74279
--- /dev/null
+++ b/sqitch/pg/revert/system_uptime.sql
@@ -0,0 +1,7 @@
+-- Revert guix-build-coordinator:system_uptime from pg
+
+BEGIN;
+
+-- XXX Add DDLs here.
+
+COMMIT;
diff --git a/sqitch/pg/verify/system_uptime.sql b/sqitch/pg/verify/system_uptime.sql
new file mode 100644
index 0000000..5676906
--- /dev/null
+++ b/sqitch/pg/verify/system_uptime.sql
@@ -0,0 +1,7 @@
+-- Verify guix-build-coordinator:system_uptime on pg
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;
diff --git a/sqitch/sqitch.plan b/sqitch/sqitch.plan
index 8c7d459..c7604f4 100644
--- a/sqitch/sqitch.plan
+++ b/sqitch/sqitch.plan
@@ -44,3 +44,4 @@ build_results_counts 2022-10-28T09:36:35Z Chris <chris@felis> # Add build_result
replace_agent_status 2023-03-22T14:17:35Z Chris <chris@felis> # Replace agent_status
agent_status_add_processor_count 2023-03-24T09:28:47Z Chris <chris@felis> # Add agent_status.processor_count
remove_build_allocation_plan 2023-04-23T19:50:23Z Chris <chris@felis> # Remove build_allocation_plan
+system_uptime 2023-05-05T18:18:35Z Chris <chris@felis> # Add system uptime
diff --git a/sqitch/sqlite/deploy/system_uptime.sql b/sqitch/sqlite/deploy/system_uptime.sql
new file mode 100644
index 0000000..80214b0
--- /dev/null
+++ b/sqitch/sqlite/deploy/system_uptime.sql
@@ -0,0 +1,16 @@
+-- Deploy guix-build-coordinator:system_uptime to sqlite
+
+BEGIN;
+
+DROP TABLE agent_status;
+
+CREATE TABLE agent_status (
+ agent_id TEXT PRIMARY KEY ASC REFERENCES agents (id),
+ timestamp DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ status TEXT NOT NULL,
+ load_average_1min INTEGER,
+ system_uptime INTEGER,
+ processor_count INTEGER
+);
+
+COMMIT;
diff --git a/sqitch/sqlite/revert/system_uptime.sql b/sqitch/sqlite/revert/system_uptime.sql
new file mode 100644
index 0000000..418f77a
--- /dev/null
+++ b/sqitch/sqlite/revert/system_uptime.sql
@@ -0,0 +1,7 @@
+-- Revert guix-build-coordinator:system_uptime from sqlite
+
+BEGIN;
+
+-- XXX Add DDLs here.
+
+COMMIT;
diff --git a/sqitch/sqlite/verify/system_uptime.sql b/sqitch/sqlite/verify/system_uptime.sql
new file mode 100644
index 0000000..0c4870c
--- /dev/null
+++ b/sqitch/sqlite/verify/system_uptime.sql
@@ -0,0 +1,7 @@
+-- Verify guix-build-coordinator:system_uptime on sqlite
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;