aboutsummaryrefslogtreecommitdiff
path: root/guix/swh.scm
diff options
context:
space:
mode:
Diffstat (limited to 'guix/swh.scm')
-rw-r--r--guix/swh.scm560
1 files changed, 560 insertions, 0 deletions
diff --git a/guix/swh.scm b/guix/swh.scm
new file mode 100644
index 0000000000..89cddb2bdd
--- /dev/null
+++ b/guix/swh.scm
@@ -0,0 +1,560 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix swh)
+ #:use-module (guix base16)
+ #:use-module (guix build utils)
+ #:use-module ((guix build syscalls) #:select (mkdtemp!))
+ #:use-module (web client)
+ #:use-module (web response)
+ #:use-module (json)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-9)
+ #:use-module (srfi srfi-11)
+ #:use-module (srfi srfi-19)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 regex)
+ #:use-module (ice-9 popen)
+ #:use-module ((ice-9 ftw) #:select (scandir))
+ #:export (origin?
+ origin-id
+ origin-type
+ origin-url
+ origin-visits
+ lookup-origin
+
+ visit?
+ visit-date
+ visit-origin
+ visit-url
+ visit-snapshot-url
+ visit-status
+ visit-number
+ visit-snapshot
+
+ branch?
+ branch-name
+ branch-target
+
+ release?
+ release-id
+ release-name
+ release-message
+ release-target
+
+ revision?
+ revision-id
+ revision-date
+ revision-directory
+ lookup-revision
+ lookup-origin-revision
+
+ content?
+ content-checksums
+ content-data-url
+ content-length
+ lookup-content
+
+ directory-entry?
+ directory-entry-name
+ directory-entry-type
+ directory-entry-checksums
+ directory-entry-length
+ directory-entry-permissions
+ lookup-directory
+ directory-entry-target
+
+ save-reply?
+ save-reply-origin-url
+ save-reply-origin-type
+ save-reply-request-date
+ save-reply-request-status
+ save-reply-task-status
+ save-origin
+ save-origin-status
+
+ vault-reply?
+ vault-reply-id
+ vault-reply-fetch-url
+ vault-reply-object-id
+ vault-reply-object-type
+ vault-reply-progress-message
+ vault-reply-status
+ query-vault
+ request-cooking
+ vault-fetch
+
+ swh-download))
+
+;;; Commentary:
+;;;
+;;; This module provides bindings to the HTTP interface of Software Heritage.
+;;; It allows you to browse the archive, look up revisions (such as SHA1
+;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See
+;;; <https://archive.softwareheritage.org/api/> for more information.
+;;;
+;;; The high-level 'swh-download' procedure allows you to download a Git
+;;; revision from Software Heritage, provided it is available.
+;;;
+;;; Code:
+
+(define %swh-base-url
+ ;; Presumably we won't need to change it.
+ "https://archive.softwareheritage.org")
+
+(define (swh-url path . rest)
+ (define url
+ (string-append %swh-base-url path
+ (string-join rest "/" 'prefix)))
+
+ ;; Ensure there's a trailing slash or we get a redirect.
+ (if (string-suffix? "/" url)
+ url
+ (string-append url "/")))
+
+(define-syntax-rule (define-json-reader json->record ctor spec ...)
+ "Define JSON->RECORD as a procedure that converts a JSON representation,
+read from a port, string, or hash table, into a record created by CTOR and
+following SPEC, a series of field specifications."
+ (define (json->record input)
+ (let ((table (cond ((port? input)
+ (json->scm input))
+ ((string? input)
+ (json-string->scm input))
+ ((hash-table? input)
+ input))))
+ (let-syntax ((extract-field (syntax-rules ()
+ ((_ table (field key json->value))
+ (json->value (hash-ref table key)))
+ ((_ table (field key))
+ (hash-ref table key))
+ ((_ table (field))
+ (hash-ref table
+ (symbol->string 'field))))))
+ (ctor (extract-field table spec) ...)))))
+
+(define-syntax-rule (define-json-mapping rtd ctor pred json->record
+ (field getter spec ...) ...)
+ "Define RTD as a record type with the given FIELDs and GETTERs, à la SRFI-9,
+and define JSON->RECORD as a conversion from JSON to a record of this type."
+ (begin
+ (define-record-type rtd
+ (ctor field ...)
+ pred
+ (field getter) ...)
+
+ (define-json-reader json->record ctor
+ (field spec ...) ...)))
+
+(define %date-regexp
+ ;; Match strings like "2014-11-17T22:09:38+01:00" or
+ ;; "2018-09-30T23:20:07.815449+00:00"".
+ (make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$"))
+
+(define (string->date* str)
+ "Return a SRFI-19 date parsed from STR, a date string as returned by
+Software Heritage."
+ ;; We can't use 'string->date' because of the timezone format: SWH returns
+ ;; "+01:00" when the '~z' template expects "+0100". So we roll our own!
+ (or (and=> (regexp-exec %date-regexp str)
+ (lambda (match)
+ (define (ref n)
+ (string->number (match:substring match n)))
+
+ (make-date (let ((ns (match:substring match 8)))
+ (if ns
+ (string->number (string-drop ns 1))
+ 0))
+ (ref 6) (ref 5) (ref 4)
+ (ref 3) (ref 2) (ref 1)
+ (+ (* 3600 (ref 9)) ;time zone
+ (if (< (ref 9) 0)
+ (- (ref 10))
+ (ref 10))))))
+ str)) ;oops!
+
+(define* (call url decode #:optional (method http-get)
+ #:key (false-if-404? #t))
+ "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
+using DECODE, a one-argument procedure that takes an input port. When
+FALSE-IF-404? is true, return #f upon 404 responses."
+ (let*-values (((response port)
+ (method url #:streaming? #t)))
+ ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
+ (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
+ (#f #t)
+ ((? (compose zero? string->number))
+ (throw 'swh-error url response))
+ (_ #t))
+
+ (cond ((= 200 (response-code response))
+ (let ((result (decode port)))
+ (close-port port)
+ result))
+ ((and false-if-404?
+ (= 404 (response-code response)))
+ (close-port port)
+ #f)
+ (else
+ (close-port port)
+ (throw 'swh-error url response)))))
+
+(define-syntax define-query
+ (syntax-rules (path)
+ "Define a procedure that performs a Software Heritage query."
+ ((_ (name args ...) docstring (path components ...)
+ json->value)
+ (define (name args ...)
+ docstring
+ (call (swh-url components ...) json->value)))))
+
+;; <https://archive.softwareheritage.org/api/1/origin/git/url/https://github.com/guix-mirror/guix/>
+(define-json-mapping <origin> make-origin origin?
+ json->origin
+ (id origin-id)
+ (visits-url origin-visits-url "origin_visits_url")
+ (type origin-type)
+ (url origin-url))
+
+;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/>
+(define-json-mapping <visit> make-visit visit?
+ json->visit
+ (date visit-date "date" string->date*)
+ (origin visit-origin)
+ (url visit-url "origin_visit_url")
+ (snapshot-url visit-snapshot-url "snapshot_url")
+ (status visit-status)
+ (number visit-number "visit"))
+
+;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
+(define-json-mapping <snapshot> make-snapshot snapshot?
+ json->snapshot
+ (branches snapshot-branches "branches" json->branches))
+
+;; This is used for the "branches" field of snapshots.
+(define-record-type <branch>
+ (make-branch name target-type target-url)
+ branch?
+ (name branch-name)
+ (target-type branch-target-type) ;release | revision
+ (target-url branch-target-url))
+
+(define (json->branches branches)
+ (hash-map->list (lambda (key value)
+ (make-branch key
+ (string->symbol
+ (hash-ref value "target_type"))
+ (hash-ref value "target_url")))
+ branches))
+
+;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
+(define-json-mapping <release> make-release release?
+ json->release
+ (id release-id)
+ (name release-name)
+ (message release-message)
+ (target-type release-target-type "target_type" string->symbol)
+ (target-url release-target-url "target_url"))
+
+;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/>
+(define-json-mapping <revision> make-revision revision?
+ json->revision
+ (id revision-id)
+ (date revision-date "date" string->date*)
+ (directory revision-directory)
+ (directory-url revision-directory-url "directory_url"))
+
+;; <https://archive.softwareheritage.org/api/1/content/>
+(define-json-mapping <content> make-content content?
+ json->content
+ (checksums content-checksums "checksums" json->checksums)
+ (data-url content-data-url "data_url")
+ (file-type-url content-file-type-url "filetype_url")
+ (language-url content-language-url "language_url")
+ (length content-length)
+ (license-url content-license-url "license_url"))
+
+(define (json->checksums checksums)
+ (hash-map->list (lambda (key value)
+ (cons key (base16-string->bytevector value)))
+ checksums))
+
+;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
+(define-json-mapping <directory-entry> make-directory-entry directory-entry?
+ json->directory-entry
+ (name directory-entry-name)
+ (type directory-entry-type "type"
+ (match-lambda
+ ("dir" 'directory)
+ (str (string->symbol str))))
+ (checksums directory-entry-checksums "checksums"
+ (match-lambda
+ (#f #f)
+ (lst (json->checksums lst))))
+ (id directory-entry-id "dir_id")
+ (length directory-entry-length)
+ (permissions directory-entry-permissions "perms")
+ (target-url directory-entry-target-url "target_url"))
+
+;; <https://archive.softwareheritage.org/api/1/origin/save/>
+(define-json-mapping <save-reply> make-save-reply save-reply?
+ json->save-reply
+ (origin-url save-reply-origin-url "origin_url")
+ (origin-type save-reply-origin-type "origin_type")
+ (request-date save-reply-request-date "save_request_date"
+ string->date*)
+ (request-status save-reply-request-status "save_request_status"
+ string->symbol)
+ (task-status save-reply-task-status "save_task_status"
+ (match-lambda
+ ("not created" 'not-created)
+ ((? string? str) (string->symbol str)))))
+
+;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
+(define-json-mapping <vault-reply> make-vault-reply vault-reply?
+ json->vault-reply
+ (id vault-reply-id)
+ (fetch-url vault-reply-fetch-url "fetch_url")
+ (object-id vault-reply-object-id "obj_id")
+ (object-type vault-reply-object-type "obj_type" string->symbol)
+ (progress-message vault-reply-progress-message "progress_message")
+ (status vault-reply-status "status" string->symbol))
+
+
+;;;
+;;; RPCs.
+;;;
+
+(define-query (lookup-origin url)
+ "Return an origin for URL."
+ (path "/api/1/origin/git/url" url)
+ json->origin)
+
+(define-query (lookup-content hash type)
+ "Return a content for HASH, of the given TYPE--e.g., \"sha256\"."
+ (path "/api/1/content"
+ (string-append type ":"
+ (bytevector->base16-string hash)))
+ json->content)
+
+(define-query (lookup-revision id)
+ "Return the revision with the given ID, typically a Git commit SHA1."
+ (path "/api/1/revision" id)
+ json->revision)
+
+(define-query (lookup-directory id)
+ "Return the directory with the given ID."
+ (path "/api/1/directory" id)
+ json->directory-entries)
+
+(define (json->directory-entries port)
+ (map json->directory-entry (json->scm port)))
+
+(define (origin-visits origin)
+ "Return the list of visits of ORIGIN, a record as returned by
+'lookup-origin'."
+ (call (swh-url (origin-visits-url origin))
+ (lambda (port)
+ (map json->visit (json->scm port)))))
+
+(define (visit-snapshot visit)
+ "Return the snapshot corresponding to VISIT."
+ (call (swh-url (visit-snapshot-url visit))
+ json->snapshot))
+
+(define (branch-target branch)
+ "Return the target of BRANCH, either a <revision> or a <release>."
+ (match (branch-target-type branch)
+ ('release
+ (call (swh-url (branch-target-url branch))
+ json->release))
+ ('revision
+ (call (swh-url (branch-target-url branch))
+ json->revision))))
+
+(define (lookup-origin-revision url tag)
+ "Return a <revision> corresponding to the given TAG for the repository
+coming from URL. Example:
+
+ (lookup-origin-release \"https://github.com/guix-mirror/guix/\" \"v0.8\")
+ => #<<revision> id: \"44941…\" …>
+
+The information is based on the latest visit of URL available. Return #f if
+URL could not be found."
+ (match (lookup-origin url)
+ (#f #f)
+ (origin
+ (match (origin-visits origin)
+ ((visit . _)
+ (let ((snapshot (visit-snapshot visit)))
+ (match (and=> (find (lambda (branch)
+ (string=? (string-append "refs/tags/" tag)
+ (branch-name branch)))
+ (snapshot-branches snapshot))
+ branch-target)
+ ((? release? release)
+ (release-target release))
+ ((? revision? revision)
+ revision)
+ (#f ;tag not found
+ #f))))
+ (()
+ #f)))))
+
+(define (release-target release)
+ "Return the revision that is the target of RELEASE."
+ (match (release-target-type release)
+ ('revision
+ (call (swh-url (release-target-url release))
+ json->revision))))
+
+(define (directory-entry-target entry)
+ "If ENTRY, a directory entry, has type 'directory, return its list of
+directory entries; if it has type 'file, return its <content> object."
+ (call (swh-url (directory-entry-target-url entry))
+ (match (directory-entry-type entry)
+ ('file json->content)
+ ('directory json->directory-entries))))
+
+(define* (save-origin url #:optional (type "git"))
+ "Request URL to be saved."
+ (call (swh-url "/api/1/origin/save" type "url" url) json->save-reply
+ http-post))
+
+(define-query (save-origin-status url type)
+ "Return the status of a /save request for URL and TYPE (e.g., \"git\")."
+ (path "/api/1/origin/save" type "url" url)
+ json->save-reply)
+
+(define-query (query-vault id kind)
+ "Ask the availability of object ID and KIND to the vault, where KIND is
+'directory or 'revision. Return #f if it could not be found, or a
+<vault-reply> on success."
+ ;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
+ ;; There's a single format supported for directories and revisions and for
+ ;; now, the "/format" bit of the URL *must* be omitted.
+ (path "/api/1/vault" (symbol->string kind) id)
+ json->vault-reply)
+
+(define (request-cooking id kind)
+ "Request the cooking of object ID and KIND (one of 'directory or 'revision)
+to the vault. Return a <vault-reply>."
+ (call (swh-url "/api/1/vault" (symbol->string kind) id)
+ json->vault-reply
+ http-post))
+
+(define* (vault-fetch id kind
+ #:key (log-port (current-error-port)))
+ "Return an input port from which a bundle of the object with the given ID
+and KIND (one of 'directory or 'revision) can be retrieved, or #f if the
+object could not be found.
+
+For a directory, the returned stream is a gzip-compressed tarball. For a
+revision, it is a gzip-compressed stream for 'git fast-import'."
+ (let loop ((reply (query-vault id kind)))
+ (match reply
+ (#f
+ (and=> (request-cooking id kind) loop))
+ (_
+ (match (vault-reply-status reply)
+ ('done
+ ;; Fetch the bundle.
+ (let-values (((response port)
+ (http-get (swh-url (vault-reply-fetch-url reply))
+ #:streaming? #t)))
+ (if (= (response-code response) 200)
+ port
+ (begin ;shouldn't happen
+ (close-port port)
+ #f))))
+ ('failed
+ ;; Upon failure, we're supposed to try again.
+ (format log-port "SWH vault: failure: ~a~%"
+ (vault-reply-progress-message reply))
+ (format log-port "SWH vault: retrying...~%")
+ (loop (request-cooking id kind)))
+ ((and (or 'new 'pending) status)
+ ;; Wait until the bundle shows up.
+ (let ((message (vault-reply-progress-message reply)))
+ (when (eq? 'new status)
+ (format log-port "SWH vault: \
+requested bundle cooking, waiting for completion...~%"))
+ (when (string? message)
+ (format log-port "SWH vault: ~a~%" message))
+
+ ;; Wait long enough so we don't exhaust our maximum number of
+ ;; requests per hour too fast (as of this writing, the limit is 60
+ ;; requests per hour per IP address.)
+ (sleep (if (eq? status 'new) 60 30))
+
+ (loop (query-vault id kind)))))))))
+
+
+;;;
+;;; High-level interface.
+;;;
+
+(define (commit-id? reference)
+ "Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
+it is a tag name."
+ (and (= (string-length reference) 40)
+ (string-every char-set:hex-digit reference)))
+
+(define (call-with-temporary-directory proc) ;FIXME: factorize
+ "Call PROC with a name of a temporary directory; close the directory and
+delete it when leaving the dynamic extent of this call."
+ (let* ((directory (or (getenv "TMPDIR") "/tmp"))
+ (template (string-append directory "/guix-directory.XXXXXX"))
+ (tmp-dir (mkdtemp! template)))
+ (dynamic-wind
+ (const #t)
+ (lambda ()
+ (proc tmp-dir))
+ (lambda ()
+ (false-if-exception (delete-file-recursively tmp-dir))))))
+
+(define (swh-download url reference output)
+ "Download from Software Heritage a checkout of the Git tag or commit
+REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
+and #f on failure.
+
+This procedure uses the \"vault\", which contains \"cooked\" directories in
+the form of tarballs. If the requested directory is not cooked yet, it will
+wait until it becomes available, which could take several minutes."
+ (match (if (commit-id? reference)
+ (lookup-revision reference)
+ (lookup-origin-revision url reference))
+ ((? revision? revision)
+ (call-with-temporary-directory
+ (lambda (directory)
+ (let ((input (vault-fetch (revision-directory revision) 'directory))
+ (tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-")))
+ (dump-port input tar)
+ (close-port input)
+ (let ((status (close-pipe tar)))
+ (unless (zero? status)
+ (error "tar extraction failure" status)))
+
+ (match (scandir directory)
+ (("." ".." sub-directory)
+ (copy-recursively (string-append directory "/" sub-directory)
+ output
+ #:log (%make-void-port "w"))
+ #t))))))
+ (#f
+ #f)))