;;; Guix Data Service -- Information about Guix over time ;;; Copyright © 2019 Christopher Baines ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU Affero General Public License ;;; as published by the Free Software Foundation, either version 3 of ;;; the License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with this program. If not, see ;;; . (define-module (guix-data-service model package-metadata) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (ice-9 vlist) #:use-module (ice-9 match) #:use-module (squee) #:use-module (json) #:use-module (gcrypt hash) #:use-module (rnrs bytevectors) #:use-module (guix base16) #:use-module (guix packages) #:use-module (guix i18n) #:use-module (guix inferior) #:use-module (guix-data-service utils) #:use-module (guix-data-service model location) #:use-module (guix-data-service model utils) #:export (select-package-metadata-by-revision-name-and-version inferior-packages->package-metadata-ids inferior-packages->translated-package-descriptions-and-synopsis insert-package-metadata-tsvector-entries package-description-and-synopsis-locale-options-guix-revision synopsis-counts-by-locale description-counts-by-locale)) (define locales '("cs_CZ.UTF-8" "da_DK.UTF-8" "de_DE.UTF-8" "eo_EO.UTF-8" "es_ES.UTF-8" "fr_FR.UTF-8" "hu_HU.UTF-8" "nl_NL.UTF-8" "pl_PL.UTF-8" "pt_BR.UTF-8" ;;"sr_SR.UTF-8" "sv_SE.UTF-8" "vi_VN.UTF-8" "zh_CN.UTF-8")) (define inferior-package-id (@@ (guix inferior) inferior-package-id)) (define (select-package-metadata package-metadata-values) (define fields '("synopsis" "description" "home_page" "location_id" "license_set_id")) (string-append "SELECT id, " (string-join (map (lambda (name) (string-append "package_metadata." name)) fields) ", ") " " "FROM package_metadata " "JOIN (VALUES " (string-join (map (match-lambda ((synopsis description home-page location-id license-set-id) (apply simple-format #f (string-append "(" (string-join (list-tabulate (length fields) (lambda (n) "~A")) ",") ")") (list (value->quoted-string-or-null synopsis) (value->quoted-string-or-null description) (value->quoted-string-or-null home-page) location-id license-set-id)))) package-metadata-values) ",") ") AS vals (" (string-join fields ", ") ") " "ON " (string-join (map (lambda (field) (if (member field '("home_page" "location_id" "license_set_id")) (string-append "(package_metadata." field " = vals." field " OR " "(package_metadata." field " IS NULL AND" " vals." field " IS NULL))") (string-append "package_metadata." field " = vals." field))) fields) " AND "))) (define* (select-package-metadata-by-revision-name-and-version conn revision-commit-hash name version locale #:key replacement?) (define query (string-append " SELECT translated_package_synopsis.synopsis, translated_package_synopsis.locale, translated_package_descriptions.description, translated_package_descriptions.locale, package_metadata.home_page, locations.file, locations.line, locations.column_number, (SELECT JSON_AGG((license_data.*)) FROM ( SELECT licenses.name, licenses.uri, licenses.comment FROM licenses INNER JOIN license_sets ON licenses.id = ANY(license_sets.license_ids) WHERE license_sets.id = package_metadata.license_set_id ORDER BY licenses.name ) AS license_data ) AS licenses FROM package_metadata INNER JOIN packages ON package_metadata.id = packages.package_metadata_id LEFT OUTER JOIN locations ON package_metadata.location_id = locations.id INNER JOIN ( SELECT DISTINCT ON (package_description_sets.id) package_description_sets.id, package_descriptions.description, package_descriptions.locale FROM package_descriptions INNER JOIN package_description_sets ON package_descriptions.id = ANY (package_description_sets.description_ids) INNER JOIN package_metadata ON package_metadata.package_description_set_id = package_description_sets.id INNER JOIN packages ON packages.package_metadata_id = package_metadata.id AND packages.name = $2 AND packages.version = $3 ORDER BY package_description_sets.id, CASE WHEN package_descriptions.locale = $4 THEN 2 WHEN package_descriptions.locale = 'en_US.UTF-8' THEN 1 ELSE 0 END DESC ) AS translated_package_descriptions ON package_metadata.package_description_set_id = translated_package_descriptions.id INNER JOIN ( SELECT DISTINCT ON (package_synopsis_sets.id) package_synopsis_sets.id, package_synopsis.synopsis, package_synopsis.locale FROM package_synopsis INNER JOIN package_synopsis_sets ON package_synopsis.id = ANY (package_synopsis_sets.synopsis_ids) INNER JOIN package_metadata ON package_metadata.package_synopsis_set_id = package_synopsis_sets.id INNER JOIN packages ON packages.package_metadata_id = package_metadata.id AND packages.name = $2 AND packages.version = $3 ORDER BY package_synopsis_sets.id, CASE WHEN package_synopsis.locale = $4 THEN 2 WHEN package_synopsis.locale = 'en_US.UTF-8' THEN 1 ELSE 0 END DESC ) AS translated_package_synopsis ON package_metadata.package_synopsis_set_id = translated_package_synopsis.id WHERE packages.id IN ( SELECT package_derivations.package_id FROM package_derivations INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id WHERE guix_revisions.commit = $1 ) AND packages.name = $2 AND packages.version = $3" (if replacement? " AND packages.replacement_package_id IS NOT NULL" " AND packages.replacement_package_id IS NULL"))) (map (match-lambda ((synopsis synopsis-locale description description-locale home-page file line column-number license-json) (list synopsis synopsis-locale description description-locale home-page file line column-number (if (string-null? license-json) #() (json-string->scm license-json))))) (exec-query conn query (list revision-commit-hash name version locale)))) (define (insert-package-metadata metadata-rows) (string-append "INSERT INTO package_metadata " "(synopsis, description, home_page, location_id, license_set_id) " "VALUES " (string-join (map (match-lambda ((synopsis description home_page location-id license-set-id) (string-append "(" (value->quoted-string-or-null synopsis) "," (value->quoted-string-or-null description) "," (value->quoted-string-or-null home_page) "," location-id "," license-set-id ")"))) metadata-rows) ",") " RETURNING id" ";")) (define (inferior-packages->translated-package-descriptions-and-synopsis inferior inferior-package) (define (translate inferior-package-id) `(let* ((package (hashv-ref %package-table ,inferior-package-id)) (source-locale "en_US.UTF-8") (source-synopsis (begin (setlocale LC_MESSAGES source-locale) (P_ (package-synopsis package)))) (source-description (begin (setlocale LC_MESSAGES source-locale) (and=> (package-description package) P_))) (synopsis-by-locale (filter-map (lambda (locale) (catch 'system-error (lambda () (setlocale LC_MESSAGES locale)) (lambda (key . args) (error (simple-format #f "error changing locale to ~A: ~A ~A" locale key args)))) (let ((synopsis (P_ (package-synopsis package)))) (setlocale LC_MESSAGES source-locale) (if (string=? synopsis source-synopsis) #f (cons locale synopsis)))) (list ,@locales))) (descriptions-by-locale (if (string? source-description) (filter-map (lambda (locale) (catch 'system-error (lambda () (setlocale LC_MESSAGES locale)) (lambda (key . args) (error (simple-format #f "error changing locale to ~A: ~A ~A" locale key args)))) (let ((description (P_ (package-description package)))) (setlocale LC_MESSAGES source-locale) (if (string=? description source-description) #f (cons locale description)))) (list ,@locales)) ;; Could be #f '()))) (cons ;; Pretend that #f descriptions are "" (cons (cons source-locale (or source-description "")) descriptions-by-locale) (cons (cons source-locale source-synopsis) synopsis-by-locale)))) (inferior-eval (translate (inferior-package-id inferior-package)) inferior)) (prevent-inlining-for-tests inferior-packages->translated-package-descriptions-and-synopsis) (define (package-synopsis-data->package-synopsis-ids conn synopsis-by-locale) (insert-missing-data-and-return-all-ids conn "package_synopsis" '(locale synopsis) (map (match-lambda ((locale . synopsis) (list locale synopsis))) synopsis-by-locale) #:delete-duplicates? #t)) (define (insert-package-synopsis-set conn package-synopsis-ids) (let ((query (string-append "INSERT INTO package_synopsis_sets (synopsis_ids) VALUES " (string-append "('{" (string-join (map number->string (sort package-synopsis-ids <)) ", ") "}')") " RETURNING id"))) (match (exec-query conn query) (((id)) id)))) (define (package-synopsis-data->package-synopsis-set-id conn synopsis-by-locale) (let* ((package-synopsis-ids (package-synopsis-data->package-synopsis-ids conn synopsis-by-locale)) (package-synopsis-set-id (exec-query conn (string-append "SELECT id FROM package_synopsis_sets" " WHERE synopsis_ids = ARRAY[" (string-join (map number->string (sort package-synopsis-ids <)) ", ") "]")))) (string->number (match package-synopsis-set-id (((id)) id) (() (insert-package-synopsis-set conn package-synopsis-ids)))))) (define (package-description-data->package-description-ids conn descriptions-by-locale) (insert-missing-data-and-return-all-ids conn "package_descriptions" '(locale description) (map (match-lambda ((locale . description) (list locale description))) descriptions-by-locale) #:delete-duplicates? #t)) (define (insert-package-description-set conn package-description-ids) (let ((query (string-append "INSERT INTO package_description_sets (description_ids) VALUES " (string-append "('{" (string-join (map number->string (sort package-description-ids <)) ", ") "}')") " RETURNING id"))) (match (exec-query conn query) (((id)) id)))) (define (package-description-data->package-description-set-id conn package-description-ids) (let* ((package-description-set-id (exec-query conn (string-append "SELECT id FROM package_description_sets" " WHERE description_ids = ARRAY[" (string-join (map number->string (sort package-description-ids <)) ", ") "]")))) (string->number (match package-description-set-id (((id)) id) (() (insert-package-description-set conn package-description-ids)))))) (define (inferior-packages->package-metadata-ids conn package-metadata license-set-ids) (insert-missing-data-and-return-all-ids conn "package_metadata" '(home_page location_id license_set_id package_description_set_id package_synopsis_set_id) (zip (map (match-lambda ((home-page rest ...) (if (string? home-page) home-page NULL))) package-metadata) (with-time-logging "preparing location ids" (map (match-lambda ((_ location rest ...) (if location (location->location-id conn location) NULL))) package-metadata)) license-set-ids (with-time-logging "preparing package description set ids" (map (lambda (package-description-ids) (package-description-data->package-description-set-id conn package-description-ids)) (with-time-logging "preparing package description ids" (map (match-lambda ((_ _ package-description-data _) (package-description-data->package-description-ids conn package-description-data))) package-metadata)))) (with-time-logging "preparing package synopsis set ids" (map (match-lambda ((_ _ _ package-synopsis-data) (package-synopsis-data->package-synopsis-set-id conn package-synopsis-data))) package-metadata))) ;; There can be duplicated entires in package-metadata, for example where ;; you have one package definition which interits from another, and just ;; overrides the version and the source, the package_metadata entries for ;; both definitions will be the same. #:delete-duplicates? #t ;; There is so much package metadata that it's worth creating a temporary ;; table #:use-temporary-table? #t)) (define (package-description-and-synopsis-locale-options-guix-revision conn revision-id) (exec-query conn " WITH package_metadata_ids AS ( SELECT packages.package_metadata_id AS id FROM packages WHERE packages.id IN ( SELECT package_derivations.package_id FROM package_derivations INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id WHERE guix_revision_package_derivations.revision_id = $1 ) ), synopsis_locales AS ( SELECT package_synopsis.locale FROM package_metadata INNER JOIN package_synopsis_sets ON package_synopsis_sets.id = package_metadata.package_synopsis_set_id INNER JOIN package_synopsis ON package_synopsis.id = ANY (package_synopsis_sets.synopsis_ids) WHERE package_metadata.id IN (SELECT id FROM package_metadata_ids) ), description_locales AS ( SELECT package_descriptions.locale FROM package_metadata INNER JOIN package_description_sets ON package_description_sets.id = package_metadata.package_description_set_id INNER JOIN package_descriptions ON package_descriptions.id = ANY (package_description_sets.description_ids) WHERE package_metadata.id IN (SELECT id FROM package_metadata_ids) ) SELECT locale FROM synopsis_locales UNION SELECT locale FROM description_locales" (list revision-id))) (define (synopsis-counts-by-locale conn revision-id) (define synopsis-counts " SELECT package_synopsis.locale, COUNT(package_synopsis.synopsis) AS translated_synopsis FROM package_synopsis_sets INNER JOIN package_synopsis ON package_synopsis.id = ANY (package_synopsis_sets.synopsis_ids) WHERE package_synopsis_sets.id IN ( SELECT package_metadata.package_synopsis_set_id FROM packages INNER JOIN package_derivations ON packages.id = package_derivations.package_id INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id INNER JOIN package_metadata ON package_metadata.id = packages.package_metadata_id WHERE guix_revisions.id = $1) GROUP BY package_synopsis.locale; ") (map (match-lambda ((locale synopsis-counts) `(,locale . ,(string->number synopsis-counts)))) (exec-query conn synopsis-counts (list revision-id)))) (define (description-counts-by-locale conn revision-id) (define description-counts " SELECT package_descriptions.locale, COUNT(package_descriptions.description) AS translated_description FROM package_description_sets INNER JOIN package_descriptions ON package_descriptions.id = ANY (package_description_sets.description_ids) WHERE package_description_sets.id IN ( SELECT package_metadata.package_description_set_id FROM packages INNER JOIN package_derivations ON packages.id = package_derivations.package_id INNER JOIN guix_revision_package_derivations ON package_derivations.id = guix_revision_package_derivations.package_derivation_id INNER JOIN guix_revisions ON guix_revision_package_derivations.revision_id = guix_revisions.id INNER JOIN package_metadata ON package_metadata.id = packages.package_metadata_id WHERE guix_revisions.id = $1) GROUP BY package_descriptions.locale; ") (map (match-lambda ((locale description-counts) `(,locale . ,(string->number description-counts)))) (exec-query conn description-counts (list revision-id)))) (define (insert-package-metadata-tsvector-entries conn package-metadata-ids) (define query (string-append " INSERT INTO package_metadata_tsvectors (package_metadata_id, locale, synopsis_and_description, package_synopsis_id, package_description_id) SELECT DISTINCT ON (package_metadata.id, locale) package_metadata.id, CASE WHEN translated_package_synopsis.locale != 'en_US.UTF-8' THEN translated_package_synopsis.locale ELSE translated_package_descriptions.locale END AS locale, ( setweight(to_tsvector(translated_package_synopsis.synopsis), 'B') || setweight(to_tsvector(translated_package_descriptions.description), 'C') ), translated_package_synopsis.id, translated_package_descriptions.id FROM package_metadata INNER JOIN ( SELECT package_description_sets.id AS package_description_set_id, package_descriptions.id, package_descriptions.description, package_descriptions.locale FROM package_description_sets INNER JOIN package_descriptions ON package_descriptions.id = ANY (package_description_sets.description_ids) ORDER BY package_description_sets.id, CASE WHEN package_descriptions.locale = 'en_US.UTF-8' THEN 1 ELSE 2 END DESC ) AS translated_package_descriptions ON package_metadata.package_description_set_id = translated_package_descriptions.package_description_set_id INNER JOIN ( SELECT package_synopsis_sets.id AS package_synopsis_set_id, package_synopsis.id, package_synopsis.synopsis, package_synopsis.locale FROM package_synopsis_sets INNER JOIN package_synopsis ON package_synopsis.id = ANY (package_synopsis_sets.synopsis_ids) ORDER BY package_synopsis_sets.id, CASE WHEN package_synopsis.locale = 'en_US.UTF-8' THEN 1 ELSE 2 END DESC ) AS translated_package_synopsis ON package_metadata.package_synopsis_set_id = translated_package_synopsis.package_synopsis_set_id AND (translated_package_descriptions.locale = translated_package_synopsis.locale OR translated_package_descriptions.locale = 'en_US.UTF-8') WHERE package_metadata.id IN (" (string-join (map number->string package-metadata-ids) ", ") ")" " ORDER BY package_metadata.id, locale, CASE WHEN translated_package_synopsis.locale = translated_package_descriptions.locale THEN 1 ELSE 0 END DESC;")) (exec-query conn query))