aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/ocr.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/ocr.scm')
-rw-r--r--gnu/packages/ocr.scm93
1 files changed, 93 insertions, 0 deletions
diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm
new file mode 100644
index 0000000000..32da42b95f
--- /dev/null
+++ b/gnu/packages/ocr.scm
@@ -0,0 +1,93 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2013 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (gnu packages ocr)
+ #:use-module ((guix licenses) #:prefix license:)
+ #:use-module (guix packages)
+ #:use-module (guix download)
+ #:use-module (guix build-system gnu)
+ #:use-module (gnu packages autotools)
+ #:use-module (gnu packages compression)
+ #:use-module (gnu packages image)
+ #:use-module (gnu packages pkg-config))
+
+(define-public ocrad
+ (package
+ (name "ocrad")
+ (version "0.24")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "mirror://gnu/ocrad/ocrad-"
+ version ".tar.lz"))
+ (sha256
+ (base32
+ "0hhlx072d00bi9qia0nj5izsq4qkscpfz2mpbyfc72msl3hfvslv"))))
+ (build-system gnu-build-system)
+ (native-inputs `(("lzip" ,lzip)))
+ (home-page "http://www.gnu.org/software/ocrad/")
+ (synopsis "Optical character recognition based on feature extraction")
+ (description
+ "GNU Ocrad is an optical character recognition program based on a
+feature extraction method. It can read images in PBM, PGM or PPM formats and
+it produces text in 8-bit or UTF-8 formats.")
+ (license license:gpl3+)))
+
+(define-public tesseract-ocr
+ (package
+ (name "tesseract-ocr")
+ (version "3.02.02")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://tesseract-ocr.googlecode.com/files/tesseract-ocr-"
+ version ".tar.gz"))
+ (sha256
+ (base32 "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"))
+ (modules '((guix build utils)))
+ ;; Leptonica added a pkg-config file in the meanwhile.
+ (snippet
+ '(substitute* "tesseract.pc.in"
+ (("^# Requires: lept ## .*")
+ "Requires: lept\n")))))
+ (build-system gnu-build-system)
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)
+ ("libtool" ,libtool)))
+ (propagated-inputs
+ `(("leptonica" ,leptonica)))
+ (arguments
+ '(#:phases
+ (modify-phases %standard-phases
+ (add-after
+ unpack autogen
+ (lambda _
+ (zero? (system* "sh" "autogen.sh")))))
+ #:configure-flags
+ (let ((leptonica (assoc-ref %build-inputs "leptonica")))
+ (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))))
+ (home-page "https://code.google.com/p/tesseract-ocr/")
+ (synopsis "Optical character recognition engine")
+ (description
+ "Tesseract is an optical character recognition (OCR) engine with very
+high accuracy. It supports many languages, output text formatting, hOCR
+positional information and page layout analysis. Several image formats are
+supported through the Leptonica library. It can also detect whether text is
+monospaced or proportional.")
+ (license license:asl2.0)))