From 0f7d0743edb0922988531ac5bbb04026c57492eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Fri, 16 Oct 2020 22:51:30 +0200 Subject: doc: Move manual index creation to a separate derivation. * doc/build.scm (normalize-language-code, html-manual-identifier-index): New procedures. (syntax-highlighted-html): Add #:mono-node-indexes and #:split-node-indexes. [build](underscore-decode, anchor-id->key, collect-anchors): Remove. (language+node-anchors, mono-node-anchors, multi-node-anchors): New variables. Use them. --- doc/build.scm | 305 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 207 insertions(+), 98 deletions(-) diff --git a/doc/build.scm b/doc/build.scm index 26ff577d5d..980d11ccf1 100644 --- a/doc/build.scm +++ b/doc/build.scm @@ -40,6 +40,7 @@ (gnu packages iso-codes) (gnu packages texinfo) (gnu packages tex) + (ice-9 match) (srfi srfi-19) (srfi srfi-71)) @@ -204,9 +205,168 @@ content=\"width=device-width, initial-scale=1\" />")) (setenv "XFAIL_TESTS" "htmlprag.scm") #t)))))))) +(define (normalize-language-code language) ;XXX: deduplicate + ;; Normalize LANGUAGE. For instance, "zh_CN" becomes "zh-cn". + (string-map (match-lambda + (#\_ #\-) + (chr chr)) + (string-downcase language))) + +(define* (html-manual-identifier-index manual base-url + #:key + (name "html-manual-identifier-index")) + "Return an index of all the identifiers that appear in MANUAL, a +makeinfo-generated manual. The index is a file that contains an alist; each +key is an identifier and the associated value is the URL reference pointing to +that identifier. The URL is constructed by concatenating BASE-URL to the +actual file name." + (define build + (with-extensions (list guile-lib/htmlprag-fixed) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (htmlprag) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 ftw) + (ice-9 match) + (ice-9 threads) + (ice-9 pretty-print)) + + (define file-url + (let ((prefix (string-append #$manual "/"))) + (lambda (file) + ;; Return the URL for FILE. + (let ((file (string-drop file (string-length prefix))) + (base #$base-url)) + (if (string-null? base) + file + (string-append base "/" file)))))) + + (define (underscore-decode str) + ;; Decode STR, an "underscore-encoded" string as produced by + ;; makeinfo for indexes, such as "_0025base_002dservices" for + ;; "%base-services". + (let loop ((str str) + (result '())) + (match (string-index str #\_) + (#f + (string-concatenate-reverse (cons str result))) + (index + (let ((char (string->number + (substring str (+ index 1) (+ index 5)) + 16))) + (loop (string-drop str (+ index 5)) + (append (list (string (integer->char char)) + (string-take str index)) + result))))))) + + (define (anchor-id->key id) + ;; Convert ID, an anchor ID such as + ;; "index-pam_002dlimits_002dservice" to the corresponding key, + ;; "pam-limits-service" in this example. Drop the suffix of + ;; duplicate anchor IDs like "operating_002dsystem-1". + (let ((id (if (any (cut string-suffix? <> id) + '("-1" "-2" "-3" "-4" "-5")) + (string-drop-right id 2) + id))) + (underscore-decode + (string-drop id (string-length "index-"))))) + + (define* (collect-anchors file #:optional (anchors '())) + ;; Collect the anchors that appear in FILE, a makeinfo-generated + ;; file. Grab those from
tags, which corresponds to + ;; Texinfo @deftp, @defvr, etc. Return ANCHORS augmented with + ;; more name/reference pairs. + (define string-or-entity? + (match-lambda + ((? string?) #t) + (('*ENTITY* _ ...) #t) + (_ #f))) + + (define (worthy-entry? lst) + ;; Attempt to match: + ;; Scheme Variable: x + ;; but not: + ;; cups-configuration parameter: … + (let loop ((lst lst)) + (match lst + (((? string-or-entity?) rest ...) + (loop rest)) + ((('strong _ ...) _ ...) + #t) + (_ #f)))) + + (let ((shtml (call-with-input-file file html->shtml))) + (let loop ((shtml shtml) + (anchors anchors)) + (match shtml + (('dt ('@ ('id id)) rest ...) + (if (and (string-prefix? "index-" id) + (worthy-entry? rest)) + (alist-cons (anchor-id->key id) + (string-append (file-url file) + "#" id) + anchors) + anchors)) + ((tag ('@ _ ...) body ...) + (fold loop anchors body)) + ((tag body ...) + (fold loop anchors body)) + (_ anchors))))) + + (define (html-files directory) + ;; Return the list of HTML files under DIRECTORY. + (map (cut string-append directory "/" <>) + (scandir #$manual (lambda (file) + (string-suffix? ".html" file))))) + + (define anchors + (sort (concatenate + (n-par-map (parallel-job-count) + (cut collect-anchors <>) + (html-files #$manual))) + (match-lambda* + (((key1 . url1) (key2 . url2)) + (if (string=? key1 key2) + (string blocks (as produced by 'makeinfo --html')." ((? string? str) str)))) - (define (underscore-decode str) - ;; Decode STR, an "underscore-encoded" string as produced by - ;; makeinfo for indexes, such as "_0025base_002dservices" for - ;; "%base-services". - (let loop ((str str) - (result '())) - (match (string-index str #\_) - (#f - (string-concatenate-reverse (cons str result))) - (index - (let ((char (string->number - (substring str (+ index 1) (+ index 5)) - 16))) - (loop (string-drop str (+ index 5)) - (append (list (string (integer->char char)) - (string-take str index)) - result))))))) - - (define (anchor-id->key id) - ;; Convert ID, an anchor ID such as - ;; "index-pam_002dlimits_002dservice" to the corresponding key, - ;; "pam-limits-service" in this example. Drop the suffix of - ;; duplicate anchor IDs like "operating_002dsystem-1". - (let ((id (if (any (cut string-suffix? <> id) - '("-1" "-2" "-3" "-4" "-5")) - (string-drop-right id 2) - id))) - (underscore-decode - (string-drop id (string-length "index-"))))) - - (define* (collect-anchors file #:optional (vhash vlist-null)) - ;; Collect the anchors that appear in FILE, a makeinfo-generated - ;; file. Grab those from
tags, which corresponds to - ;; Texinfo @deftp, @defvr, etc. Return VHASH augmented with - ;; more name/reference pairs. - (define string-or-entity? - (match-lambda - ((? string?) #t) - (('*ENTITY* _ ...) #t) - (_ #f))) - - (define (worthy-entry? lst) - ;; Attempt to match: - ;; Scheme Variable: x - ;; but not: - ;; cups-configuration parameter: … - (let loop ((lst lst)) - (match lst - (((? string-or-entity?) rest ...) - (loop rest)) - ((('strong _ ...) _ ...) - #t) - (_ #f)))) - - (let ((shtml (call-with-input-file file html->shtml))) - (let loop ((shtml shtml) - (vhash vhash)) - (match shtml - (('dt ('@ ('id id)) rest ...) - (if (and (string-prefix? "index-" id) - (worthy-entry? rest)) - (vhash-cons (anchor-id->key id) - (string-append (basename file) - "#" id) - vhash) - vhash)) - ((tag ('@ _ ...) body ...) - (fold loop vhash body)) - ((tag body ...) - (fold loop vhash body)) - (_ vhash))))) - (define (process-html file anchors) ;; Parse FILE and perform syntax highlighting for its Scheme ;; snippets. Install the result to #$output. @@ -444,38 +532,59 @@ its
 blocks (as produced by 'makeinfo --html')."
             (define (html? file stat)
               (string-suffix? ".html" file))
 
+            (define language+node-anchors
+              (match-lambda
+                ((language files ...)
+                 (cons language
+                       (fold (lambda (file vhash)
+                               (let ((alist (call-with-input-file file read)))
+                                 ;; Use 'fold-right' so that the first entry
+                                 ;; wins (e.g., "car" from "Pairs" rather than
+                                 ;; from "rnrs base" in the Guile manual).
+                                 (fold-right (match-lambda*
+                                               (((key . value) vhash)
+                                                (vhash-cons key value vhash)))
+                                             vhash
+                                             alist)))
+                             vlist-null
+                             files)))))
+
+            (define mono-node-anchors
+              ;; List of language/vhash pairs, where each vhash maps an
+              ;; identifier to the corresponding URL in a single-page manual.
+              (map language+node-anchors '#$mono-node-indexes))
+
+            (define multi-node-anchors
+              ;; Likewise for split-node manuals.
+              (map language+node-anchors '#$split-node-indexes))
+
             ;; Install a UTF-8 locale so we can process UTF-8 files.
             (setenv "GUIX_LOCPATH"
                     #+(file-append glibc-utf8-locales "/lib/locale"))
             (setlocale LC_ALL "en_US.utf8")
 
             ;; First process the mono-node 'guix.html' files.
-            (n-par-for-each (parallel-job-count)
-                            (lambda (mono)
-                              (let ((anchors (collect-anchors mono)))
-                                (process-html mono anchors)))
-                            (find-files
-                             #$input
-                             "^guix(-cookbook|)(\\.[a-zA-Z_-]+)?\\.html$"))
-
-            ;; Next process the multi-node HTML files in two phases: (1)
-            ;; collect the list of anchors, and (2) perform
-            ;; syntax-highlighting.
-            (let* ((multi   (find-files #$input "^html_node$"
-                                        #:directories? #t))
-                   (anchors (n-par-map (parallel-job-count)
-                                       (lambda (multi)
-                                         (cons multi
-                                               (fold collect-anchors vlist-null
-                                                     (find-files multi html?))))
-                                       multi)))
-              (n-par-for-each (parallel-job-count)
-                              (lambda (file)
-                                (let ((anchors (assoc-ref anchors (dirname file))))
-                                  (process-html file anchors)))
-                              (append-map (lambda (multi)
-                                            (find-files multi html?))
-                                          multi)))
+            (for-each (match-lambda
+                        ((language . anchors)
+                         (let ((files (find-files
+                                       (string-append #$input "/" language)
+                                       "^guix(-cookbook|)(\\.[a-zA-Z_-]+)?\\.html$")))
+                           (n-par-for-each (parallel-job-count)
+                                           (cut process-html <> anchors)
+                                           files))))
+                      mono-node-anchors)
+
+            ;; Process the multi-node HTML files.
+            (for-each (match-lambda
+                        ((language . anchors)
+                         (let ((files (find-files
+                                       (string-append #$input "/" language
+                                                      "/html_node")
+                                       "\\.html$")))
+                           (n-par-for-each (parallel-job-count)
+                                           (cut process-html <> anchors)
+                                           files))))
+                      multi-node-anchors)
 
             ;; Last, copy non-HTML files as is.
             (for-each copy-as-is
-- 
cgit v1.2.3