aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/textutils.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/textutils.scm')
-rw-r--r--gnu/packages/textutils.scm182
1 files changed, 154 insertions, 28 deletions
diff --git a/gnu/packages/textutils.scm b/gnu/packages/textutils.scm
index 2c520dfbdf..79cf172179 100644
--- a/gnu/packages/textutils.scm
+++ b/gnu/packages/textutils.scm
@@ -10,10 +10,10 @@
;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
;;; Copyright © 2017 Eric Bavier <bavier@member.fsf.org>
;;; Copyright © 2017 Rene Saavedra <rennes@openmailbox.org>
-;;; Copyright © 2017 Hartmut Goebel <h.goebel@crazy-compilers.com>
+;;; Copyright © 2017,2019 Hartmut Goebel <h.goebel@crazy-compilers.com>
;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net>
;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com>
-;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr>
+;;; Copyright © 2018, 2019 Tobias Geerinckx-Rice <me@tobias.gr>
;;; Copyright © 2018 Pierre Neidhardt <mail@ambrevar.xyz>
;;; Copyright © 2018 Meiyo Peng <meiyo.peng@gmail.com>
;;; Copyright © 2019 Yoshinori Arai <kumagusu08@gmail.com>
@@ -45,6 +45,7 @@
#:use-module (guix build-system python)
#:use-module (gnu packages)
#:use-module (gnu packages autotools)
+ #:use-module (gnu packages base)
#:use-module (gnu packages compression)
#:use-module (gnu packages gettext)
#:use-module (gnu packages java)
@@ -88,23 +89,18 @@ to DOS format and vice versa.")
(define-public recode
(package
(name "recode")
- (version "3.7.1")
+ (version "3.7.6")
(source
(origin
(method url-fetch)
(uri (string-append "https://github.com/rrthomas/recode/releases/"
- "download/v" version "/" name "-" version ".tar.gz"))
+ "download/v" version "/recode-" version ".tar.gz"))
(sha256
- (base32
- "0215hfj0rhlh0grg91qfx75pp6z09bpv8211qdxqihniw7y9a4fs"))
- (modules '((guix build utils)))
- (snippet '(begin
- (delete-file "tests/Recode.c")
- #t))))
+ (base32 "0m59sd1ca0zw1aydpc3m8sw03nc885knmccqryg7byzmqs585ia6"))))
(build-system gnu-build-system)
(native-inputs
- `(("python" ,python-2)
- ("python2-cython" ,python2-cython)))
+ `(("python" ,python)
+ ("python-cython" ,python-cython)))
(home-page "https://github.com/rrthomas/recode")
(synopsis "Text encoding converter")
(description "The Recode library converts files between character sets and
@@ -321,6 +317,112 @@ input bits thoroughly but are not suitable for cryptography.")
;; entails."
(license license:public-domain)))
+(define-public ascii2binary
+ (package
+ (name "ascii2binary")
+ (version "2.14")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "http://billposer.org/Software/Downloads/"
+ "ascii2binary-" version ".tar.bz2"))
+ (sha256
+ (base32 "0dc9fxcdmppbs9s06jvq61zbk552laxps0xyk098gj41697ihd96"))))
+ (build-system gnu-build-system)
+ (native-inputs
+ `(("gettext" ,gettext-minimal)))
+ (home-page "https://billposer.org/Software/a2b.html")
+ (synopsis "Convert between ASCII, hexadecimal and binary representations")
+ (description "The two programs are useful for generating test data, for
+inspecting binary files, and for interfacing programs that generate textual
+output to programs that require binary input and conversely. They can also be
+useful when it is desired to reformat numbers.
+
+@itemize
+
+@item @command{ascii2binary} reads input consisting of ascii or hexadecimal
+ representation numbers separated by whitespace and produces as output
+ the binary equivalents. The type and precision of the binary output
+ is selected using command line flags.
+
+@item @command{binary2ascii} reads input consisting of binary numbers
+ and converts them to their ascii or hexadecimal representation.
+ Command line flags specify the type and size of the binary numbers
+ and provide control over the format of the output.
+ Unsigned integers may be written out in binary, octal, decimal,
+ or hexadecimal.
+
+ Signed integers may be written out only in binary or decimal. Floating
+ point numbers may be written out only decimal, either in standard or
+ scientific notation. (If you want to examine the binary representation
+ of floating point numbers, just treat the input as a sequence of unsigned
+ characters.)
+
+@end itemize")
+ (license license:gpl3)))
+
+(define-public uniutils
+ (package
+ (name "uniutils")
+ (version "2.27")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "http://billposer.org/Software/Downloads/"
+ "uniutils-" version ".tar.bz2"))
+ (sha256
+ (base32 "19w1510w87gx7n4qy3zsb0m467a4rn5scvh4ajajg7jh6x5xri08"))))
+ (build-system gnu-build-system)
+ (arguments
+ '(#:configure-flags '("--disable-dependency-tracking")
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'build 'fix-paths
+ (lambda* (#:key outputs inputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out"))
+ (a2b (assoc-ref inputs "ascii2binary"))
+ (iconv (assoc-ref inputs "libiconv")))
+ (substitute* "utf8lookup"
+ (("^ascii2binary ") (string-append a2b "/bin/ascii2binary "))
+ (("^uniname ") (string-append out "/bin/uniname "))
+ (("^iconv ") (string-append iconv "/bin/iconv ")))
+ #t))))))
+ (inputs
+ `(("ascii2binary" ,ascii2binary)
+ ("libiconv" ,libiconv)))
+ (home-page "https://billposer.org/Software/unidesc.html")
+ (synopsis "Find out what is in a Unicode file")
+ (description "Useful tools when working with Unicode files when one
+doesn't know the writing system, doesn't have the necessary font, needs to
+inspect invisible characters, needs to find out whether characters have been
+combined or in what order they occur, or needs statistics on which characters
+occur.
+
+@itemize
+
+@item @command{uniname} defaults to printing the character offset of each
+character, its byte offset, its hex code value, its encoding, the glyph
+itself, and its name. It may also be used to validate UTF-8 input.
+
+@item @command{unidesc} reports the character ranges to which different
+portions of the text belong. It can also be used to identify Unicode encodings
+(e.g. UTF-16be) flagged by magic numbers.
+
+@item @command{unihist} generates a histogram of the characters in its input.
+
+@item @command{ExplicateUTF8} is intended for debugging or for learning about
+Unicode. It determines and explains the validity of a sequence of bytes as a
+UTF8 encoding.
+
+@item @command{utf8lookup} provides a handy way to look up Unicode characters
+from the command line.
+
+@item @command{unireverse} reverse each line of UTF-8 input
+character-by-character.
+
+@end itemize")
+ (license license:gpl3)))
+
(define-public libconfig
(package
(name "libconfig")
@@ -368,7 +470,7 @@ as existing hashing techniques, with provably negligible risk of collisions.")
(define-public oniguruma
(package
(name "oniguruma")
- (version "6.9.2")
+ (version "6.9.3")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/kkos/"
@@ -376,7 +478,7 @@ as existing hashing techniques, with provably negligible risk of collisions.")
"/onig-" version ".tar.gz"))
(sha256
(base32
- "0slp4hpw9qxk4xhn7abyw7065sd355xwkyfq72glxczcjsqxsynv"))))
+ "0pvj37r1rd5h5vw99mdk8z4k44gq1ldwrapkamdiicksdfkr4ndb"))))
(build-system gnu-build-system)
(home-page "https://github.com/kkos/oniguruma")
(synopsis "Regular expression library")
@@ -385,20 +487,6 @@ characteristic of this library is that different character encoding for every
regular expression object can be specified.")
(license license:bsd-2)))
-;; PHP < 7.3.0 requires this old version. Remove once no longer needed.
-(define-public oniguruma-5
- (package
- (inherit oniguruma)
- (version "5.9.6")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/kkos/"
- "oniguruma/releases/download/v" version
- "/onig-" version ".tar.gz"))
- (sha256
- (base32
- "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m"))))))
-
(define-public antiword
(package
(name "antiword")
@@ -786,6 +874,44 @@ indentation.
(home-page "http://docx2txt.sourceforge.net")
(license license:gpl3+)))
+(define-public odt2txt
+ (package
+ (name "odt2txt")
+ (version "0.5")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/dstosberg/odt2txt/")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0im3kzvhxkjlx57w6h13mc9584c74ma1dyymgvpq2y61av3gc35v"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ; no make check
+ #:make-flags (list "CC=gcc"
+ (string-append "DESTDIR=" (assoc-ref %outputs "out")))
+ #:phases
+ (modify-phases %standard-phases
+ ;; no configure script
+ (delete 'configure))))
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "https://github.com/dstosberg/odt2txt/")
+ (synopsis "Converter from OpenDocument Text to plain text")
+ (description "odt2txt is a command-line tool which extracts the text out
+of OpenDocument Texts, as produced by OpenOffice.org, KOffice, StarOffice and
+others.
+
+odt2txt can also extract text from some file formats similar to OpenDocument
+Text, such as OpenOffice.org XML (*.sxw), which was used by OpenOffice.org
+version 1.x and older StarOffice versions. To a lesser extent, odt2txt may be
+useful to extract content from OpenDocument spreadsheets (*.ods) and
+OpenDocument presentations (*.odp).")
+ (license license:gpl2)))
+
(define-public opencc
(package
(name "opencc")