diff options
author | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2022-08-11 17:53:21 -0400 |
---|---|---|
committer | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2022-08-12 00:38:46 -0400 |
commit | f7c027617da44ed684bbb9b35c603b790365fdbf (patch) | |
tree | 51b85b1e449a02c2c13e4f0241cbe82f7c710b43 /gnu/packages/ocr.scm | |
parent | 887dbf4d802d1061e08eb47026f1adab2c43d791 (diff) | |
download | guix-f7c027617da44ed684bbb9b35c603b790365fdbf.tar guix-f7c027617da44ed684bbb9b35c603b790365fdbf.tar.gz |
gnu: tesseract-ocr: Update to 5.2.0.
* gnu/packages/ocr.scm (tesseract-ocr): Update to 5.2.0.
[inputs, native-inputs]: Move after arguments. Use new style inputs.
[arguments]: Use gexps.
[configure-flags]: Add --disable-static.
[phases]{fix-docbook}: Replace phase with...
{do-not-override-xml-catalog-files}: ... this new phase.
{build-training}: Move after build phase. Enable parallel build.
{trailing-install}: Move after install phase.
[native-inputs]: Add libxml2.
Diffstat (limited to 'gnu/packages/ocr.scm')
-rw-r--r-- | gnu/packages/ocr.scm | 124 |
1 files changed, 63 insertions, 61 deletions
diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm index 11595e87e5..e28bd17668 100644 --- a/gnu/packages/ocr.scm +++ b/gnu/packages/ocr.scm @@ -5,6 +5,7 @@ ;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com> ;;; Copyright © 2021 Andy Tai <atai@atai.org> ;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr> +;;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -74,71 +75,72 @@ it produces text in 8-bit or UTF-8 formats.") (license license:gpl3+))) (define-public tesseract-ocr - ;; There are useful commits beyond the last official stable release. - (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df") - (revision "1")) - (package - (name "tesseract-ocr") - (version (git-version "4.1.1" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/tesseract-ocr/tesseract") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98")))) - (build-system gnu-build-system) - (inputs - `(("cairo" ,cairo) - ("icu" ,icu4c) - ("leptonica" ,leptonica) - ("pango" ,pango) - ("python-wrapper" ,python-wrapper))) - (native-inputs - `(("asciidoc" ,asciidoc) - ("autoconf" ,autoconf) - ("automake" ,automake) - ("docbook-xsl" ,docbook-xsl) - ("libarchive" ,libarchive) - ("libcurl" ,curl) - ("libtool" ,libtool) - ("libtiff" ,libtiff) - ("pkg-config" ,pkg-config) - ("xsltproc" ,libxslt))) - (arguments - `(#:configure-flags - (let ((leptonica (assoc-ref %build-inputs "leptonica"))) - (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include"))) - #:tests? #f ; Tests currently result in a segfault - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'fix-docbook - (lambda* (#:key inputs #:allow-other-keys) - ;; Don't attempt to download XSL schema. - (substitute* "doc/Makefile.am" - (("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl") - (string-append (assoc-ref inputs "docbook-xsl") - "/xml/xsl/docbook-xsl-" - ,(package-version docbook-xsl) - "/manpages/docbook.xsl"))))) - (add-after 'install 'build-training - (lambda _ - (invoke "make" "training"))) - (add-after 'build-training 'install-training - (lambda _ - (invoke "make" "training-install")))))) - (home-page "https://github.com/tesseract-ocr/tesseract") - (synopsis "Optical character recognition engine") - (description - "Tesseract is an optical character recognition (OCR) engine with very + (package + (name "tesseract-ocr") + (version "5.2.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/tesseract-ocr/tesseract") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0dai539h07lqj8lyhznd3wbwdpqr78qrsczq78rsmsryqvmdbyaa")))) + (build-system gnu-build-system) + (arguments + (list + #:configure-flags + #~(list (string-append "LIBLEPT_HEADERSDIR=" + #$(this-package-input "leptonica") "/include") + "--disable-static") ;avoid 6 MiB static archive + ;; The unit tests are disabled because they require building bundled + ;; third party libraries. + #:tests? #f + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'do-not-override-xml-catalog-files + (lambda _ + (substitute* "configure.ac" + (("AC_SUBST\\(\\[XML_CATALOG_FILES])") + "")))) + (add-after 'build 'build-training + (lambda* (#:key parallel-build? #:allow-other-keys) + (define n (if parallel-build? (number->string + (parallel-job-count)) + "1")) + (invoke "make" "-j" n "training"))) + (add-after 'install 'install-training + (lambda _ + (invoke "make" "training-install")))))) + (native-inputs + (list asciidoc + autoconf + automake + curl + docbook-xsl + libarchive + libtiff + libtool + libxml2 ;for XML_CATALOG_FILES + libxslt + pkg-config)) + (inputs + (list cairo + icu4c + leptonica + pango + python-wrapper)) + (home-page "https://github.com/tesseract-ocr/tesseract") + (synopsis "Optical character recognition engine") + (description + "Tesseract is an optical character recognition (OCR) engine with very high accuracy. It supports many languages, output text formatting, hOCR positional information and page layout analysis. Several image formats are supported through the Leptonica library. It can also detect whether text is monospaced or proportional.") - (license license:asl2.0)))) + (license license:asl2.0))) (define-public gimagereader (package |