summaryrefslogtreecommitdiff
path: root/guix
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2015-03-05 22:00:11 +0100
committerLudovic Courtès <ludo@gnu.org>2015-03-05 22:17:36 +0100
commitce72c780746776a86f59747f5eff8731cb4ff39b (patch)
tree72b3bd889fd71a4eeeca33811a56ec63cec52e1f /guix
parent472e4c430343671a6cb4e5ed392beae04ef09da6 (diff)
downloadgnu-guix-ce72c780746776a86f59747f5eff8731cb4ff39b.tar
gnu-guix-ce72c780746776a86f59747f5eff8731cb4ff39b.tar.gz
store: Attempt to decode build logs as UTF-8.
* guix/serialization.scm (read-maybe-utf8-string): New procedure. * guix/store.scm (process-stderr): Use it for the build log and errors. * tests/store.scm ("current-build-output-port, UTF-8", "current-build-output-port, UTF-8 + garbage"): New tests.
Diffstat (limited to 'guix')
-rw-r--r--guix/serialization.scm18
-rw-r--r--guix/store.scm9
2 files changed, 23 insertions, 4 deletions
diff --git a/guix/serialization.scm b/guix/serialization.scm
index da01ff39f5..a99f53ee0b 100644
--- a/guix/serialization.scm
+++ b/guix/serialization.scm
@@ -29,7 +29,8 @@
#:export (write-int read-int
write-long-long read-long-long
write-padding
- write-string read-string read-latin1-string
+ write-string
+ read-string read-latin1-string read-maybe-utf8-string
write-string-list read-string-list
write-string-pairs
write-store-path read-store-path
@@ -130,6 +131,21 @@
;; upgraded to Guile >= 2.0.9.
(list->string (map integer->char (bytevector->u8-list bv)))))
+(define (read-maybe-utf8-string p)
+ "Read a serialized string from port P. Attempt to decode it as UTF-8 and
+substitute invalid byte sequences with question marks. This is a
+\"permissive\" UTF-8 decoder."
+ ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
+ ;; and substitute invalid byte sequences with question marks, but this is
+ ;; not very efficient. Eventually Guile may provide a lightweight
+ ;; permissive UTF-8 decoder.
+ (let* ((bv (read-byte-string p))
+ (port (with-fluids ((%default-port-encoding "UTF-8")
+ (%default-port-conversion-strategy
+ 'substitute))
+ (open-bytevector-input-port bv))))
+ (get-string-all port)))
+
(define (write-string-list l p)
(write-int (length l) p)
(for-each (cut write-string <> p) l))
diff --git a/guix/store.scm b/guix/store.scm
index d88fb3ea54..a3f3cbf43b 100644
--- a/guix/store.scm
+++ b/guix/store.scm
@@ -418,15 +418,18 @@ encoding conversion errors."
(write-padding len p)
#f))
((= k %stderr-next)
- ;; Log a string.
- (let ((s (read-latin1-string p)))
+ ;; Log a string. Build logs are usually UTF-8-encoded, but they
+ ;; may also contain arbitrary byte sequences that should not cause
+ ;; this to fail. Thus, use the permissive
+ ;; 'read-maybe-utf8-string'.
+ (let ((s (read-maybe-utf8-string p)))
(display s (current-build-output-port))
(when (string-any %newlines s)
(flush-output-port (current-build-output-port)))
#f))
((= k %stderr-error)
;; Report an error.
- (let ((error (read-latin1-string p))
+ (let ((error (read-maybe-utf8-string p))
;; Currently the daemon fails to send a status code for early
;; errors like DB schema version mismatches, so check for EOF.
(status (if (and (>= (nix-server-minor-version server) 8)