From 8067897fb940daa4db64decea76b1679610c3422 Mon Sep 17 00:00:00 2001 From: Vinicius Monego Date: Sat, 20 Feb 2021 04:05:03 -0300 Subject: gnu: Add python-pdfminer-six. * gnu/packages/python-xyz.scm (python-pdfminer-six): New variable. Signed-off-by: Nicolas Goaziou --- gnu/packages/python-xyz.scm | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/gnu/packages/python-xyz.scm b/gnu/packages/python-xyz.scm index 2ca4625c2c..b26fce72a9 100644 --- a/gnu/packages/python-xyz.scm +++ b/gnu/packages/python-xyz.scm @@ -12309,6 +12309,54 @@ encoding algorithms to do fuzzy string matching.") module, adding support for Unicode strings.") (license license:bsd-2))) +(define-public python-pdfminer-six + (package + (name "python-pdfminer-six") + (version "20201018") + ;; There are no tests in the PyPI tarball. + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pdfminer/pdfminer.six") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1a2fxxnnjqbx344znpvx7cnv1881dk6585ibw01inhfq3w6yj2lr")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + ;; Tests write to the source tree. + (add-after 'unpack 'make-git-checkout-writable + (lambda _ + (for-each make-file-writable (find-files ".")) + #t)) + (replace 'check + (lambda _ + (invoke "make" "test"))) + (add-before 'reset-gzip-timestamps 'make-files-writable + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (for-each make-file-writable + (find-files out "\\.gz$")) + #t)))))) + (propagated-inputs + `(("python-chardet" ,python-chardet) + ("python-cryptography" ,python-cryptography) + ("python-sortedcontainers" ,python-sortedcontainers))) + (native-inputs + `(("python-nose" ,python-nose) + ("python-tox" ,python-tox))) + (home-page "https://github.com/pdfminer/pdfminer.six") + (synopsis "PDF parser and analyzer") + (description "@code{pdfminer.six} is a community maintained fork of +the original PDFMiner. It is a tool for extracting information from PDF +documents. It focuses on getting and analyzing text data. Pdfminer.six +extracts the text from a page directly from the sourcecode of the PDF. It +can also be used to get the exact location, font or color of the text.") + (license license:expat))) + (define-public python-rarfile (package (name "python-rarfile") -- cgit v1.2.3