From bb357c509e1c017e1fef5aa5f4d05beea0c25157 Mon Sep 17 00:00:00 2001 From: Danny Milosavljevic Date: Sun, 3 May 2020 21:40:04 +0200 Subject: file-systems: Fix UTF-16 handling in initrd. Follow-up to f73f4b3a2d7a313a6cb1667bd69205ea4b09f57c. * gnu/build/file-systems.scm (bytevector->u16-list): New procedure. (utf16->string): New procedure. --- gnu/build/file-systems.scm | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gnu/build/file-systems.scm b/gnu/build/file-systems.scm index 4ac672d96f..b920e8fc62 100644 --- a/gnu/build/file-systems.scm +++ b/gnu/build/file-systems.scm @@ -110,6 +110,31 @@ NUL terminator, return the size of the bytevector." (loop (+ index 2))) length)))) +(define* (bytevector->u16-list bv endianness #:optional (index 0)) + (if (< index (bytevector-length bv)) + (cons (bytevector-u16-ref bv index endianness) + (bytevector->u16-list bv endianness (+ index 2))) + '())) + +;; The initrd doesn't have iconv data, so do the conversion ourselves. +(define (utf16->string bv endianness) + (list->string + (map integer->char + (reverse + (let loop ((remainder (bytevector->u16-list bv endianness)) + (result '())) + (match remainder + (() result) + ((a) (cons a result)) + ((a b x ...) + (if (and (>= a #xD800) (< a #xDC00) ; high surrogate + (>= b #xDC00) (< b #xE000)) ; low surrogate + (loop x (cons (+ #x10000 + (* #x400 (- a #xD800)) + (- b #xDC00)) + result)) + (loop (cons b x) (cons a result)))))))))) + (define (null-terminated-utf16->string bv endianness) (utf16->string (sub-bytevector bv 0 (bytevector-utf16-length bv)) endianness)) -- cgit v1.2.3