diff options
author | Ludovic Courtès <ludo@gnu.org> | 2018-10-09 09:53:02 +0200 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2018-10-09 18:54:12 +0200 |
commit | fe17037b387c6eca0c45f0526d2761e982a192bb (patch) | |
tree | 72a5117986a4f5d983c9e3bf360ad0d8845eddd4 | |
parent | 276f368051ff52cf202ede9fce579e49d9d744ec (diff) | |
download | patches-fe17037b387c6eca0c45f0526d2761e982a192bb.tar patches-fe17037b387c6eca0c45f0526d2761e982a192bb.tar.gz |
status: Gracefully handle invalid UTF-8 in build logs.
* guix/status.scm (maybe-utf8->string): New procedure.
(build-event-output-port): Use it in lieu of 'utf8->string'.
* tests/status.scm ("build-output-port, UTF-8")
("current-build-output-port, UTF-8 + garbage"): New tests.
-rw-r--r-- | guix/status.scm | 19 | ||||
-rw-r--r-- | tests/status.scm | 22 |
2 files changed, 39 insertions, 2 deletions
diff --git a/guix/status.scm b/guix/status.scm index c6956066fd..13537c70cd 100644 --- a/guix/status.scm +++ b/guix/status.scm @@ -34,6 +34,7 @@ #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (ice-9 binary-ports) + #:autoload (ice-9 rdelim) (read-string) #:use-module (rnrs bytevectors) #:use-module ((system foreign) #:select (bytevector->pointer pointer->bytevector)) @@ -429,6 +430,22 @@ ON-CHANGE can display the build status, build events, etc." (define %newline (char-set #\return #\newline)) +(define (maybe-utf8->string bv) + "Attempt to decode BV as UTF-8 string and return it. Gracefully handle the +case where BV does not contain only valid UTF-8." + (catch 'decoding-error + (lambda () + (utf8->string bv)) + (lambda _ + ;; This is the sledgehammer but it's the only safe way we have to + ;; properly handle this. It's expensive but it's rarely needed. + (let ((port (open-bytevector-input-port bv))) + (set-port-encoding! port "UTF-8") + (set-port-conversion-strategy! port 'substitute) + (let ((str (read-string port))) + (close-port port) + str))))) + (define* (build-event-output-port proc #:optional (seed (build-status))) "Return an output port for use as 'current-build-output-port' that calls PROC with its current state value, initialized with SEED, on every build @@ -464,7 +481,7 @@ The second return value is a thunk to retrieve the current state." (pointer->bytevector ptr count))) (define (write! bv offset count) - (let loop ((str (utf8->string (bytevector-range bv offset count)))) + (let loop ((str (maybe-utf8->string (bytevector-range bv offset count)))) (match (string-index str %newline) ((? integer? cr) (let ((tail (string-take str (+ 1 cr)))) diff --git a/tests/status.scm b/tests/status.scm index 04dedb702c..486ad04dd2 100644 --- a/tests/status.scm +++ b/tests/status.scm @@ -20,7 +20,9 @@ #:use-module (guix status) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) - #:use-module (srfi srfi-64)) + #:use-module (srfi srfi-64) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports)) (test-begin "status") @@ -112,4 +114,22 @@ (display "@ substituter-succeeded baz\n" port) (list first (get-status))))) +(test-equal "build-output-port, UTF-8" + '((build-log "lambda is λ!\n")) + (let-values (((port get-status) (build-event-output-port cons '())) + ((bv) (string->utf8 "lambda is λ!\n"))) + (put-bytevector port bv) + (force-output port) + (get-status))) + +(test-equal "current-build-output-port, UTF-8 + garbage" + ;; What about a mixture of UTF-8 + garbage? + '((build-log "garbage: �lambda: λ\n")) + (let-values (((port get-status) (build-event-output-port cons '()))) + (display "garbage: " port) + (put-bytevector port #vu8(128)) + (put-bytevector port (string->utf8 "lambda: λ\n")) + (force-output port) + (get-status))) + (test-end "status") |