aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages
diff options
context:
space:
mode:
authorLars-Dominik Braun <ldb@leibniz-psychology.org>2021-03-15 09:40:05 +0100
committerLars-Dominik Braun <ldb@leibniz-psychology.org>2021-03-15 10:53:02 +0100
commit804fad34e8e0f74483e987cfe5f6a496c1debe74 (patch)
treebd8570a9444bb43caa24a2b4dacf5a6a3d4c52b1 /gnu/packages
parent584c868fbd6fe5fea5715e2938fc80ec14446502 (diff)
downloadguix-804fad34e8e0f74483e987cfe5f6a496c1debe74.tar
guix-804fad34e8e0f74483e987cfe5f6a496c1debe74.tar.gz
gnu: Add r-textclean.
* gnu/packages/cran.scm (r-textclean): New variable.
Diffstat (limited to 'gnu/packages')
-rw-r--r--gnu/packages/cran.scm35
1 files changed, 35 insertions, 0 deletions
diff --git a/gnu/packages/cran.scm b/gnu/packages/cran.scm
index b8a57cdb28..7f6003ac01 100644
--- a/gnu/packages/cran.scm
+++ b/gnu/packages/cran.scm
@@ -27510,3 +27510,38 @@ and word lists.")
three, ... Ordinals are also available, first, second, third, ... and
indefinite article choice, \"a\" or \"an\".")
(license license:gpl2)))
+
+(define-public r-textclean
+ (package
+ (name "r-textclean")
+ (version "0.9.3")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (cran-uri "textclean" version))
+ (sha256
+ (base32
+ "0kgjh6c4f14qkjc4fds7q7rpf4nkma3p0igm54fplmm3p853nvrz"))))
+ (properties `((upstream-name . "textclean")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-data-table" ,r-data-table)
+ ("r-english" ,r-english)
+ ("r-glue" ,r-glue)
+ ("r-lexicon" ,r-lexicon)
+ ("r-mgsub" ,r-mgsub)
+ ("r-qdapregex" ,r-qdapregex)
+ ("r-stringi" ,r-stringi)
+ ("r-textshape" ,r-textshape)))
+ (home-page
+ "https://github.com/trinker/textclean")
+ (synopsis "Text Cleaning Tools")
+ (description
+ "Tools to clean and process text. Tools are geared at checking for
+substrings that are not optimal for analysis and replacing or removing them
+(normalizing) with more analysis friendly substrings (see Sproat, Black, Chen,
+Kumar, Ostendorf, & Richards (2001) @url{doi:10.1006/csla.2001.0169}) or
+extracting them into new variables. For example, emoticons are often used in
+text but not always easily handled by analysis algorithms. The
+@code{replace_emoticon()} function replaces emoticons with word equivalents.")
+ (license license:gpl2)))