Merge branch 'master' into core-updates

author: Marius Bakke <mbakke@fastmail.com> 2019-05-01 23:11:41 +0200
committer: Marius Bakke <mbakke@fastmail.com> 2019-05-01 23:11:41 +0200
commit: 3b458d5462e6bbd852c2dc5c6670d5655abf53f5 (patch)
tree: 4f3ccec0de1c355134369333c17e948e3258d546 /gnu/packages/bioinformatics.scm
parent: 2ca3fdc2db1aef96fbf702a2f26f5e18ce832038 (diff)
parent: 14da3daafc8dd92fdabd3367694c930440fd72cb (diff)
download: patches-3b458d5462e6bbd852c2dc5c6670d5655abf53f5.tar
patches-3b458d5462e6bbd852c2dc5c6670d5655abf53f5.tar.gz
1 files changed, 381 insertions, 255 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 1836939970..8a49e1e350 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -2325,23 +2325,21 @@ files.  The code was previously part of the cutadapt tool.")
 (define-public cutadapt
   (package
     (name "cutadapt")
-    (version "1.18")
+    (version "2.1")
     (source (origin
-              (method git-fetch)
-              (uri (git-reference
-                    (url "https://github.com/marcelm/cutadapt.git")
-                    (commit (string-append "v" version))))
-              (file-name (git-file-name name version))
+              (method url-fetch)
+              (uri (pypi-uri "cutadapt" version))
               (sha256
                (base32
-                "08bbfwyc0kvcd95jf2s95xiv9s3cbsxm39ydl0qck3fw3cviwxpg"))))
+                "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
     (build-system python-build-system)
     (inputs
      `(("python-dnaio" ,python-dnaio)
        ("python-xopen" ,python-xopen)))
     (native-inputs
      `(("python-cython" ,python-cython)
-       ("python-pytest" ,python-pytest)))
+       ("python-pytest" ,python-pytest)
+       ("python-setuptools-scm" ,python-setuptools-scm)))
     (home-page "https://cutadapt.readthedocs.io/en/stable/")
     (synopsis "Remove adapter sequences from nucleotide sequencing reads")
     (description
@@ -6792,14 +6790,14 @@ databases.  Packages produced are intended to be used with AnnotationDbi.")
 (define-public r-rbgl
   (package
     (name "r-rbgl")
-    (version "1.58.1")
+    (version "1.58.2")
     (source
      (origin
        (method url-fetch)
        (uri (bioconductor-uri "RBGL" version))
        (sha256
         (base32
-         "1l5x2icv9di1lr3gqfi0vjnyd9xc3l77yc42ippqd4cadj3d1pzf"))))
+         "0vhnh47pswnp27c0zqcbnnsayfmq3cxcgrs9g860555ldqfl4cyl"))))
     (properties `((upstream-name . "RBGL")))
     (build-system r-build-system)
     (propagated-inputs `(("r-graph" ,r-graph)))
@@ -7014,29 +7012,6 @@ annotation infrastructure.")
      "This package provides a pipeline for the analysis of GRO-seq data.")
     (license license:gpl3+)))
 
-(define-public r-sparql
-  (package
-  (name "r-sparql")
-  (version "1.16")
-  (source (origin
-           (method url-fetch)
-           (uri (cran-uri "SPARQL" version))
-           (sha256
-            (base32
-             "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
-  (properties `((upstream-name . "SPARQL")))
-  (build-system r-build-system)
-  (propagated-inputs
-   `(("r-rcurl" ,r-rcurl)
-     ("r-xml" ,r-xml)))
-  (home-page "https://cran.r-project.org/web/packages/SPARQL")
-  (synopsis "SPARQL client for R")
-  (description "This package provides an interface to use SPARQL to pose
-SELECT or UPDATE queries to an end-point.")
-  ;; The only license indication is found in the DESCRIPTION file,
-  ;; which states GPL-3.  So we cannot assume GPLv3+.
-  (license license:gpl3)))
-
 (define-public vsearch
   (package
     (name "vsearch")
@@ -7243,32 +7218,6 @@ BLAST, KEGG, GenBank, MEDLINE and GO.")
     ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
     (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
 
-(define-public r-acsnminer
-  (package
-    (name "r-acsnminer")
-    (version "0.16.8.25")
-    (source (origin
-              (method url-fetch)
-              (uri (cran-uri "ACSNMineR" version))
-              (sha256
-               (base32
-                "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
-    (properties `((upstream-name . "ACSNMineR")))
-    (build-system r-build-system)
-    (propagated-inputs
-      `(("r-ggplot2" ,r-ggplot2)
-        ("r-gridextra" ,r-gridextra)))
-    (home-page "https://cran.r-project.org/web/packages/ACSNMineR")
-    (synopsis "Gene enrichment analysis")
-    (description
-     "This package provides tools to compute and represent gene set enrichment
-or depletion from your data based on pre-saved maps from the @dfn{Atlas of
-Cancer Signalling Networks} (ACSN) or user imported maps.  The gene set
-enrichment can be run with hypergeometric test or Fisher exact test, and can
-use multiple corrections.  Visualization of data can be done either by
-barplots or heatmaps.")
-    (license license:gpl2+)))
-
 (define-public r-biocinstaller
   (package
     (name "r-biocinstaller")
@@ -7315,30 +7264,6 @@ categorize packages in a Bioconductor package repository according to keywords,
 also known as views, in a controlled vocabulary.")
     (license license:artistic2.0)))
 
-(define-public r-bookdown
-  (package
-    (name "r-bookdown")
-    (version "0.9")
-    (source (origin
-              (method url-fetch)
-              (uri (cran-uri "bookdown" version))
-              (sha256
-               (base32
-                "0vg1s1w0l9pm95asqb21yf39mfk1nc9rdhmlys9xwr7p7i7rsz32"))))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-htmltools" ,r-htmltools)
-       ("r-knitr" ,r-knitr)
-       ("r-rmarkdown" ,r-rmarkdown)
-       ("r-tinytex" ,r-tinytex)
-       ("r-yaml" ,r-yaml)
-       ("r-xfun" ,r-xfun)))
-    (home-page "https://github.com/rstudio/bookdown")
-    (synopsis "Authoring books and technical documents with R markdown")
-    (description "This package provides output formats and utilities for
-authoring books and technical documents with R Markdown.")
-    (license license:gpl3)))
-
 (define-public r-biocstyle
   (package
     (name "r-biocstyle")
@@ -7412,29 +7337,6 @@ functionality.")
 checks on R packages that are to be submitted to the Bioconductor repository.")
     (license license:artistic2.0)))
 
-(define-public r-optparse
-  (package
-    (name "r-optparse")
-    (version "1.6.1")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (cran-uri "optparse" version))
-       (sha256
-        (base32
-         "04vyb6dhcga30mvghsg1p052jmf69xqxkvh3hzqz7dscyppy76w1"))))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-getopt" ,r-getopt)))
-    (home-page
-     "https://github.com/trevorld/optparse")
-    (synopsis "Command line option parser")
-    (description
-     "This package provides a command line parser inspired by Python's
-@code{optparse} library to be used with Rscript to write shebang scripts
-that accept short and long options.")
-    (license license:gpl2+)))
-
 (define-public r-s4vectors
   (package
     (name "r-s4vectors")
@@ -7462,31 +7364,6 @@ In addition, a few low-level concrete subclasses of general interest (e.g.
 S4Vectors package itself.")
     (license license:artistic2.0)))
 
-(define-public r-seqinr
-  (package
-    (name "r-seqinr")
-    (version "3.4-5")
-    (source
-      (origin
-        (method url-fetch)
-        (uri (cran-uri "seqinr" version))
-        (sha256
-          (base32
-            "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-ade4" ,r-ade4)
-       ("r-segmented" ,r-segmented)))
-    (inputs
-     `(("zlib" ,zlib)))
-    (home-page "http://seqinr.r-forge.r-project.org/")
-    (synopsis "Biological sequences retrieval and analysis")
-    (description
-     "This package provides tools for exploratory data analysis and data
-visualization of biological sequence (DNA and protein) data.  It also includes
-utilities for sequence data management under the ACNUC system.")
-    (license license:gpl2+)))
-
 (define-public r-iranges
   (package
     (name "r-iranges")
@@ -7888,10 +7765,10 @@ biological sequences or sets of sequences.")
     (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
     (synopsis "Interface to samtools, bcftools, and tabix")
     (description
-     "This package provides an interface to the 'samtools', 'bcftools', and
-'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
-binary variant call (BCF) and compressed indexed tab-delimited (tabix)
-files.")
+     "This package provides an interface to the @code{samtools},
+@code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
+Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
+tab-delimited (tabix) files.")
     (license license:expat)))
 
 (define-public r-delayedarray
@@ -8039,13 +7916,13 @@ as well as query and modify the browser state, such as the current viewport.")
 (define-public r-genomicfeatures
   (package
     (name "r-genomicfeatures")
-    (version "1.34.6")
+    (version "1.34.8")
     (source (origin
               (method url-fetch)
               (uri (bioconductor-uri "GenomicFeatures" version))
               (sha256
                (base32
-                "1cz7qx324dmsrkzyhm956cfgr08gpily5rpym7hc8zz5kbl6i3ra"))))
+                "1sxp86hdsg32l2c85jgic65gy92d8kxsm01264hrx6yikdhicjax"))))
     (properties
      `((upstream-name . "GenomicFeatures")))
     (build-system r-build-system)
@@ -8413,6 +8290,9 @@ secondary structure and comparative analysis in R.")
          "13fv78sk5g0gqfl3ks3rps3zc1k66a4lzxvgn36r7ix43yxk7hnr"))))
     (properties `((upstream-name . "Rhtslib")))
     (build-system r-build-system)
+    ;; Without this a temporary directory ends up in the Rhtslib.so binary,
+    ;; which makes R abort the build.
+    (arguments '(#:configure-flags '("--no-staged-install")))
     (propagated-inputs
      `(("r-zlibbioc" ,r-zlibbioc)))
     (inputs
@@ -8467,6 +8347,7 @@ paired-end data.")
               (sha256
                (base32
                 "0ss5hcg2m7gjji6dd23zxa5bd5a7knwcnada4qs5q2l4clgk39ad"))))
+    (properties `((upstream-name . "RCAS")))
     (build-system r-build-system)
     (propagated-inputs
      `(("r-annotationdbi" ,r-annotationdbi)
@@ -8587,45 +8468,6 @@ characterization and visualization of a wide range of mutational patterns
 in SNV base substitution data.")
     (license license:expat)))
 
-(define-public r-wgcna
-  (package
-    (name "r-wgcna")
-    (version "1.66")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (cran-uri "WGCNA" version))
-       (sha256
-        (base32
-         "0rhnyhzfn93yp24jz9v6dzrmyizwzdw070a7idm0k33w1cm8sjqv"))))
-    (properties `((upstream-name . "WGCNA")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotationdbi" ,r-annotationdbi)
-       ("r-doparallel" ,r-doparallel)
-       ("r-dynamictreecut" ,r-dynamictreecut)
-       ("r-fastcluster" ,r-fastcluster)
-       ("r-foreach" ,r-foreach)
-       ("r-go-db" ,r-go-db)
-       ("r-hmisc" ,r-hmisc)
-       ("r-impute" ,r-impute)
-       ("r-rcpp" ,r-rcpp)
-       ("r-robust" ,r-robust)
-       ("r-survival" ,r-survival)
-       ("r-matrixstats" ,r-matrixstats)
-       ("r-preprocesscore" ,r-preprocesscore)))
-    (home-page
-     "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
-    (synopsis "Weighted correlation network analysis")
-    (description
-     "This package provides functions necessary to perform Weighted
-Correlation Network Analysis on high-dimensional data.  It includes functions
-for rudimentary data cleaning, construction and summarization of correlation
-networks, module identification and functions for relating both variables and
-modules to sample traits.  It also includes a number of utility functions for
-data manipulation and visualization.")
-    (license license:gpl2+)))
-
 (define-public r-chipkernels
   (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
         (revision "1"))
@@ -8693,39 +8535,6 @@ bound and non bound genomic regions to accurately identify transcription
 factors bound at the specific regions.")
     (license license:gpl2+)))
 
-(define-public r-gkmsvm
-  (package
-    (name "r-gkmsvm")
-    (version "0.79.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (cran-uri "gkmSVM" version))
-       (sha256
-        (base32
-         "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3"))))
-    (properties `((upstream-name . "gkmSVM")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biocgenerics" ,r-biocgenerics)
-       ("r-biostrings" ,r-biostrings)
-       ("r-genomeinfodb" ,r-genomeinfodb)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-iranges" ,r-iranges)
-       ("r-kernlab" ,r-kernlab)
-       ("r-rcpp" ,r-rcpp)
-       ("r-rocr" ,r-rocr)
-       ("r-rtracklayer" ,r-rtracklayer)
-       ("r-s4vectors" ,r-s4vectors)
-       ("r-seqinr" ,r-seqinr)))
-    (home-page "https://cran.r-project.org/web/packages/gkmSVM")
-    (synopsis "Gapped-kmer support vector machine")
-    (description
-     "This R package provides tools for training gapped-kmer SVM classifiers
-for DNA and protein sequences.  This package supports several sequence
-kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
-    (license license:gpl2+)))
-
 (define-public r-tximport
   (package
     (name "r-tximport")
@@ -9632,40 +9441,33 @@ and irregular enzymatic cleavages, mass measurement accuracy, etc.")
 (define-public r-seurat
   (package
     (name "r-seurat")
-    (version "2.3.4")
+    (version "3.0.0")
     (source (origin
               (method url-fetch)
               (uri (cran-uri "Seurat" version))
               (sha256
                (base32
-                "0l8bv4i9nzz26mirnva10mq6pimibj24vk7vpvfypgn7xk4942hd"))))
+                "183lm2wk0i3g114jbdf7pb4ssizr48qzqv3cknbsiackr8kvpsvc"))))
     (properties `((upstream-name . "Seurat")))
     (build-system r-build-system)
     (propagated-inputs
      `(("r-ape" ,r-ape)
        ("r-cluster" ,r-cluster)
        ("r-cowplot" ,r-cowplot)
-       ("r-dosnow" ,r-dosnow)
-       ("r-dplyr" ,r-dplyr)
-       ("r-dtw" ,r-dtw)
        ("r-fitdistrplus" ,r-fitdistrplus)
-       ("r-foreach" ,r-foreach)
-       ("r-fpc" ,r-fpc)
+       ("r-future" ,r-future)
+       ("r-future-apply" ,r-future-apply)
        ("r-ggplot2" ,r-ggplot2)
+       ("r-ggrepel" ,r-ggrepel)
        ("r-ggridges" ,r-ggridges)
-       ("r-gplots" ,r-gplots)
-       ("r-hdf5r" ,r-hdf5r)
-       ("r-hmisc" ,r-hmisc)
-       ("r-httr" ,r-httr)
        ("r-ica" ,r-ica)
        ("r-igraph" ,r-igraph)
        ("r-irlba" ,r-irlba)
-       ("r-lars" ,r-lars)
+       ("r-kernsmooth" ,r-kernsmooth)
        ("r-lmtest" ,r-lmtest)
        ("r-mass" ,r-mass)
        ("r-matrix" ,r-matrix)
        ("r-metap" ,r-metap)
-       ("r-mixtools" ,r-mixtools)
        ("r-pbapply" ,r-pbapply)
        ("r-plotly" ,r-plotly)
        ("r-png" ,r-png)
@@ -9674,12 +9476,14 @@ and irregular enzymatic cleavages, mass measurement accuracy, etc.")
        ("r-rcpp" ,r-rcpp)
        ("r-rcppeigen" ,r-rcppeigen)
        ("r-rcppprogress" ,r-rcppprogress)
-       ("r-reshape2" ,r-reshape2)
        ("r-reticulate" ,r-reticulate)
+       ("r-rlang" ,r-rlang)
        ("r-rocr" ,r-rocr)
+       ("r-rsvd" ,r-rsvd)
        ("r-rtsne" ,r-rtsne)
+       ("r-scales" ,r-scales)
+       ("r-sctransform" ,r-sctransform)
        ("r-sdmtools" ,r-sdmtools)
-       ("r-tidyr" ,r-tidyr)
        ("r-tsne" ,r-tsne)))
     (home-page "http://www.satijalab.org/seurat")
     (synopsis "Seurat is an R toolkit for single cell genomics")
@@ -10026,14 +9830,14 @@ originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
 (define-public r-ensembldb
   (package
     (name "r-ensembldb")
-    (version "2.6.7")
+    (version "2.6.8")
     (source
      (origin
        (method url-fetch)
        (uri (bioconductor-uri "ensembldb" version))
        (sha256
         (base32
-         "1wqq0m1fgvgkzq5zr2s9cj2s7qkg9lx3dwwsqixzs5fn52p4dn7f"))))
+         "0gijx2l2y00h6gfj3gfr7rd4vva6qf2vkfdfy5gdmvqlxy84ka38"))))
     (build-system r-build-system)
     (propagated-inputs
      `(("r-annotationdbi" ,r-annotationdbi)
@@ -10152,6 +9956,16 @@ effort and encourages consistency.")
         (base32
          "0wq49qqzkcn8s19xgaxf2s1j1a563d7pbhhvris6fhxfdjsz4934"))))
     (build-system r-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         ;; See https://github.com/tengfei/ggbio/issues/117
+         ;; This fix will be included in the next release.
+         (add-after 'unpack 'fix-typo
+           (lambda _
+             (substitute* "R/GGbio-class.R"
+               (("fechable") "fetchable"))
+             #t)))))
     (propagated-inputs
      `(("r-annotationdbi" ,r-annotationdbi)
        ("r-annotationfilter" ,r-annotationfilter)
@@ -10352,14 +10166,14 @@ by Ernst and Kellis.")
 (define-public r-ldblock
   (package
     (name "r-ldblock")
-    (version "1.12.0")
+    (version "1.12.1")
     (source
      (origin
        (method url-fetch)
        (uri (bioconductor-uri "ldblock" version))
        (sha256
         (base32
-         "0xbf4pmhrk5fnd1iz5wzjvdr75v114bwpznhcig4wiqmxc27sips"))))
+         "01lf74pby7si2g3kgc10qzr6lkcbigqcgqs2j3anc38vzxv0zhwv"))))
     (build-system r-build-system)
     (propagated-inputs
      `(("r-biocgenerics" ,r-biocgenerics)
@@ -10384,14 +10198,14 @@ defining LD blocks.")
 (define-public r-gqtlstats
   (package
     (name "r-gqtlstats")
-    (version "1.14.0")
+    (version "1.14.1")
     (source
      (origin
        (method url-fetch)
        (uri (bioconductor-uri "gQTLstats" version))
        (sha256
         (base32
-         "1sg9kw59dlayj7qxql9pd93d4hmml504sa3kkfpzfh3xri7m5pxf"))))
+         "1rkbnb3h02fdksc4nacqvmq4jgbj9fz4hm7j51yr2ggcgcykwraa"))))
     (properties `((upstream-name . "gQTLstats")))
     (build-system r-build-system)
     (propagated-inputs
@@ -10625,14 +10439,14 @@ block processing.")
 (define-public r-rhdf5lib
   (package
     (name "r-rhdf5lib")
-    (version "1.4.2")
+    (version "1.4.3")
     (source
      (origin
        (method url-fetch)
        (uri (bioconductor-uri "Rhdf5lib" version))
        (sha256
         (base32
-         "06bxd3wz8lrvh2hzvmjpdv4lvzj5lz9353bw5b3zb98cb8w9r2j5"))
+         "0hjhjvg2kss71fkmxlbgnyyy1agwzgq57rxkgkm4riw82x2rvw7q"))
        (modules '((guix build utils)))
        (snippet
         '(begin
@@ -10861,14 +10675,14 @@ memory usage and processing time is minimized.")
 (define-public r-phangorn
   (package
     (name "r-phangorn")
-    (version "2.4.0")
+    (version "2.5.3")
     (source
      (origin
        (method url-fetch)
        (uri (cran-uri "phangorn" version))
        (sha256
         (base32
-         "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i"))))
+         "1bv86yfk5r015s7ij6v4zz7bagwrw9m13yfs5853drxb19d5h1m3"))))
     (build-system r-build-system)
     (propagated-inputs
      `(("r-ape" ,r-ape)
@@ -11075,23 +10889,31 @@ with narrow binding events such as transcription factor ChIP-seq.")
 (define-public trim-galore
   (package
     (name "trim-galore")
-    (version "0.4.5")
+    (version "0.6.1")
     (source
      (origin
        (method git-fetch)
        (uri (git-reference
              (url "https://github.com/FelixKrueger/TrimGalore.git")
              (commit version)))
-       (file-name (string-append name "-" version "-checkout"))
+       (file-name (git-file-name name version))
        (sha256
         (base32
-         "0x5892l48c816pf00wmnz5vq0zq6170d3xc8zrxncd4jcz7h1p71"))))
+         "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #f                      ; no tests
        #:phases
        (modify-phases %standard-phases
-         (delete 'configure)
+         (replace 'configure
+           (lambda _
+             ;; Trim Galore tries to figure out what version of Python
+             ;; cutadapt is using by looking at the shebang.  Of course that
+             ;; doesn't work, because cutadapt is wrapped in a shell script.
+             (substitute* "trim_galore"
+               (("my \\$python_return.*")
+                "my $python_return = \"Python 3.999\";\n"))
+             #t))
          (delete 'build)
          (add-after 'unpack 'hardcode-tool-references
            (lambda* (#:key inputs #:allow-other-keys)
@@ -11100,14 +10922,18 @@ with narrow binding events such as transcription factor ChIP-seq.")
                 (string-append "$path_to_cutadapt = '"
                                (assoc-ref inputs "cutadapt")
                                "/bin/cutadapt'"))
-               (("\\| gzip")
-                (string-append "| "
+               (("\\$compression_path = \"gzip\"")
+                (string-append "$compression_path = \""
                                (assoc-ref inputs "gzip")
-                               "/bin/gzip"))
+                               "/bin/gzip\""))
                (("\"gunzip")
                 (string-append "\""
                                (assoc-ref inputs "gzip")
-                               "/bin/gunzip")))
+                               "/bin/gunzip"))
+               (("\"pigz")
+                (string-append "\""
+                               (assoc-ref inputs "pigz")
+                               "/bin/pigz")))
              #t))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
@@ -11119,6 +10945,7 @@ with narrow binding events such as transcription factor ChIP-seq.")
     (inputs
      `(("gzip" ,gzip)
        ("perl" ,perl)
+       ("pigz" ,pigz)
        ("cutadapt" ,cutadapt)))
     (native-inputs
      `(("unzip" ,unzip)))
@@ -12220,6 +12047,23 @@ graphs.  This library makes it easy to work with @file{.loom} files for
 single-cell RNA-seq data.")
     (license license:bsd-3)))
 
+;; pigx-scrnaseq does not work with the latest version of loompy.
+(define-public python-loompy-for-pigx-scrnaseq
+  (package (inherit python-loompy)
+    (name "python-loompy")
+    (version "2.0.3")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/linnarsson-lab/loompy.git")
+                    (commit (string-append "v" version))))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "0pjyl532pl8sbv71yci6h0agchn0naw2qjcwj50n6afrsahbsag3"))))
+    ;; There are none.
+    (arguments '(#:tests? #f))))
+
 ;; We cannot use the latest commit because it requires Java 9.
 (define-public java-forester
   (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
@@ -12833,7 +12677,7 @@ expression report comparing samples in an easily configurable manner.")
 (define-public pigx-chipseq
   (package
     (name "pigx-chipseq")
-    (version "0.0.31")
+    (version "0.0.40")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
@@ -12841,7 +12685,7 @@ expression report comparing samples in an easily configurable manner.")
                                   "/pigx_chipseq-" version ".tar.gz"))
               (sha256
                (base32
-                "0l3vd9xwqzap3mmyj8xwqp84kj7scbq308diqnwg2albphl75xqs"))))
+                "0y9x62cfwzhsp82imnawyamxp58bcb00yjxdy44spylqnjdlsaj8"))))
     (build-system gnu-build-system)
     ;; parts of the tests rely on access to the network
     (arguments '(#:tests? #f))
@@ -12990,7 +12834,7 @@ methylation and segmentation.")
        ("python-pandas" ,python-pandas)
        ("python-magic" ,python-magic)
        ("python-numpy" ,python-numpy)
-       ("python-loompy" ,python-loompy)
+       ("python-loompy" ,python-loompy-for-pigx-scrnaseq)
        ("ghc-pandoc" ,ghc-pandoc)
        ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
        ("samtools" ,samtools)
@@ -13461,31 +13305,57 @@ in RNA-seq data.")
 (define-public python-scanpy
   (package
     (name "python-scanpy")
-    (version "1.2.2")
+    (version "1.4")
+    ;; Fetch from git because the pypi tarball does not include tests.
     (source
      (origin
-       (method url-fetch)
-       (uri (pypi-uri "scanpy" version))
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/theislab/scanpy.git")
+             (commit version)))
+       (file-name (git-file-name name version))
        (sha256
         (base32
-         "1ak7bxms5a0yvf65prppq2g38clkv7c7jnjbnfpkh3xxv7q512jz"))))
+         "0zn6x6c0cnm1a20i6isigwb51g3pr9zpjk8r1minjqnxi5yc9pm4"))))
     (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (replace 'check
+           (lambda* (#:key inputs #:allow-other-keys)
+             ;; These tests require Internet access.
+             (delete-file-recursively "scanpy/tests/notebooks")
+             (delete-file "scanpy/tests/test_clustering.py")
+
+             ;; TODO: I can't get the plotting tests to work, even with Xvfb.
+             (delete-file "scanpy/tests/test_plotting.py")
+             (delete-file "scanpy/tests/test_preprocessing.py")
+             (delete-file "scanpy/tests/test_read_10x.py")
+
+             (setenv "PYTHONPATH"
+                     (string-append (getcwd) ":"
+                                    (getenv "PYTHONPATH")))
+             (invoke "pytest")
+             #t)))))
     (propagated-inputs
      `(("python-anndata" ,python-anndata)
+       ("python-h5py" ,python-h5py)
        ("python-igraph" ,python-igraph)
-       ("python-numba" ,python-numba)
        ("python-joblib" ,python-joblib)
+       ("python-louvain" ,python-louvain)
+       ("python-matplotlib" ,python-matplotlib)
        ("python-natsort" ,python-natsort)
        ("python-networkx" ,python-networkx)
-       ("python-statsmodels" ,python-statsmodels)
-       ("python-scikit-learn" ,python-scikit-learn)
-       ("python-matplotlib" ,python-matplotlib)
+       ("python-numba" ,python-numba)
        ("python-pandas" ,python-pandas)
+       ("python-scikit-learn" ,python-scikit-learn)
        ("python-scipy" ,python-scipy)
        ("python-seaborn" ,python-seaborn)
-       ("python-h5py" ,python-h5py)
+       ("python-statsmodels" ,python-statsmodels)
        ("python-tables" ,python-tables)))
-    (home-page "http://github.com/theislab/scanpy")
+    (native-inputs
+     `(("python-pytest" ,python-pytest)))
+    (home-page "https://github.com/theislab/scanpy")
     (synopsis "Single-Cell Analysis in Python.")
     (description "Scanpy is a scalable toolkit for analyzing single-cell gene
 expression data.  It includes preprocessing, visualization, clustering,
@@ -13494,6 +13364,38 @@ Python-based implementation efficiently deals with datasets of more than one
 million cells.")
     (license license:bsd-3)))
 
+(define-public python-bbknn
+  (package
+    (name "python-bbknn")
+    (version "1.3.1")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "bbknn" version))
+       (sha256
+        (base32
+         "1qgdganvj3lyxj84v7alm23b9vqhwpn8z0115qndpnpy90qxynwz"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-annoy" ,python-annoy)
+       ("python-cython" ,python-cython)
+       ("python-faiss" ,python-faiss)
+       ("python-numpy" ,python-numpy)
+       ("python-scanpy" ,python-scanpy)))
+    (home-page "https://github.com/Teichlab/bbknn")
+    (synopsis "Batch balanced KNN")
+    (description "BBKNN is a batch effect removal tool that can be directly
+used in the Scanpy workflow.  It serves as an alternative to
+@code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
+graph for subsequent use in clustering, pseudotime and UMAP visualisation.  If
+technical artifacts are present in the data, they will make it challenging to
+link corresponding cell types across different batches.  BBKNN actively
+combats this effect by splitting your data into batches and finding a smaller
+number of neighbours for each cell within each of the groups.  This helps
+create connections between analogous cells in different batches without
+altering the counts or PCA space.")
+    (license license:expat)))
+
 (define-public gffcompare
   (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
         (revision "1"))
@@ -14661,3 +14563,227 @@ overlapping paired-ended reads into (longer) consensus sequences.
 Additionally, the AdapterRemoval may be used to recover a consensus adapter
 sequence for paired-ended data, for which this information is not available.")
     (license license:gpl3+)))
+
+(define-public pplacer
+  (let ((commit "807f6f3"))
+    (package
+      (name "pplacer")
+      ;; The commit should be updated with each version change.
+      (version "1.1.alpha19")
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/matsen/pplacer.git")
+               (commit (string-append "v" version))))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
+      (build-system ocaml-build-system)
+      (arguments
+       `(#:modules ((guix build ocaml-build-system)
+                    (guix build utils)
+                    (ice-9 ftw))
+         #:phases
+         (modify-phases %standard-phases
+           (delete 'configure)
+           (add-after 'unpack 'fix-build-with-latest-ocaml
+             (lambda _
+               (substitute* "myocamlbuild.ml"
+                 (("dep \\[\"c_pam\"\\]" m)
+                  (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
+                                 m))
+                 (("let run_and_read" m)
+                  (string-append "
+let split s ch =
+  let x = ref [] in
+  let rec go s =
+    let pos = String.index s ch in
+    x := (String.before s pos)::!x;
+    go (String.after s (pos + 1))
+  in
+  try go s
+  with Not_found -> !x
+let split_nl s = split s '\\n'
+let before_space s =
+  try String.before s (String.index s ' ')
+  with Not_found -> s
+
+" m))
+                 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
+                  (string-append "List.map before_space (split_nl & " m ")"))
+                 (("    blank_sep_strings &") "")
+                 (("      Lexing.from_string &") ""))
+               #t))
+           (add-after 'unpack 'replace-bundled-cddlib
+             (lambda* (#:key inputs #:allow-other-keys)
+               (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
+                      (local-dir "cddlib_guix"))
+                 (mkdir local-dir)
+                 (with-directory-excursion local-dir
+                   (invoke "tar" "xvf" cddlib-src))
+                 (let ((cddlib-src-folder
+                        (string-append local-dir "/"
+                                       (list-ref (scandir local-dir) 2)
+                                       "/lib-src")))
+                   (for-each make-file-writable (find-files "cdd_src" ".*"))
+                   (for-each
+                    (lambda (file)
+                      (copy-file file
+                                 (string-append "cdd_src/" (basename file))))
+                    (find-files cddlib-src-folder ".*[ch]$")))
+                 #t)))
+           (add-after 'unpack 'fix-makefile
+             (lambda _
+               ;; Remove system calls to 'git'.
+               (substitute* "Makefile"
+                 (("^DESCRIPT:=pplacer-.*")
+                  (string-append
+                   "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
+               (substitute* "myocamlbuild.ml"
+                 (("git describe --tags --long .*\\\" with")
+                  (string-append
+                   "echo -n v" ,version "-" ,commit "\" with")))
+               #t))
+           (replace 'install
+             (lambda* (#:key outputs #:allow-other-keys)
+               (let* ((out (assoc-ref outputs "out"))
+                      (bin (string-append out "/bin")))
+                 (copy-recursively "bin" bin))
+               #t)))))
+      (inputs
+       `(("zlib" ,zlib "static")
+         ("gsl" ,gsl)
+         ("ocaml-ounit" ,ocaml-ounit)
+         ("ocaml-batteries" ,ocaml-batteries)
+         ("ocaml-camlzip" ,camlzip)
+         ("ocaml-csv" ,ocaml-csv)
+         ("ocaml-sqlite3" ,ocaml-sqlite3)
+         ("ocaml-xmlm" ,ocaml-xmlm)
+         ("ocaml-mcl" ,ocaml-mcl)
+         ("ocaml-gsl" ,ocaml-gsl-1)))
+      (native-inputs
+       `(("cddlib-src" ,(package-source cddlib))
+         ("ocamlbuild" ,ocamlbuild)
+         ("pkg-config" ,pkg-config)))
+      (propagated-inputs
+       `(("pplacer-scripts" ,pplacer-scripts)))
+      (synopsis "Phylogenetic placement of biological sequences")
+      (description
+       "Pplacer places query sequences on a fixed reference phylogenetic tree
+to maximize phylogenetic likelihood or posterior probability according to a
+reference alignment.  Pplacer is designed to be fast, to give useful
+information about uncertainty, and to offer advanced visualization and
+downstream analysis.")
+      (home-page "http://matsen.fhcrc.org/pplacer")
+      (license license:gpl3))))
+
+;; This package is installed alongside 'pplacer'.  It is a separate package so
+;; that it can use the python-build-system for the scripts that are
+;; distributed alongside the main OCaml binaries.
+(define pplacer-scripts
+  (package
+    (inherit pplacer)
+    (name "pplacer-scripts")
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'enter-scripts-dir
+           (lambda _ (chdir "scripts") #t))
+         (replace 'check
+           (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
+         (add-after 'install 'wrap-executables
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             (let* ((out (assoc-ref outputs "out"))
+                    (bin (string-append out "/bin")))
+               (let ((path (string-append
+                            (assoc-ref inputs "hmmer") "/bin:"
+                            (assoc-ref inputs "infernal") "/bin")))
+                 (display path)
+                 (wrap-program (string-append bin "/refpkg_align.py")
+                   `("PATH" ":" prefix (,path))))
+               (let ((path (string-append
+                            (assoc-ref inputs "hmmer") "/bin")))
+                 (wrap-program (string-append bin "/hrefpkg_query.py")
+                   `("PATH" ":" prefix (,path)))))
+             #t)))))
+    (inputs
+     `(("infernal" ,infernal)
+       ("hmmer" ,hmmer)))
+    (propagated-inputs
+     `(("python-biopython" ,python2-biopython)
+       ("taxtastic" ,taxtastic)))
+    (synopsis "Pplacer Python scripts")))
+
+(define-public python2-checkm-genome
+  (package
+    (name "python2-checkm-genome")
+    (version "1.0.13")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "checkm-genome" version))
+       (sha256
+        (base32
+         "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2
+       #:tests? #f)) ; some tests are interactive
+    (propagated-inputs
+     `(("python-dendropy" ,python2-dendropy)
+       ("python-matplotlib" ,python2-matplotlib)
+       ("python-numpy" ,python2-numpy)
+       ("python-pysam" ,python2-pysam)
+       ("python-scipy" ,python2-scipy)))
+    (home-page "http://pypi.python.org/pypi/checkm/")
+    (synopsis "Assess the quality of putative genome bins")
+    (description
+     "CheckM provides a set of tools for assessing the quality of genomes
+recovered from isolates, single cells, or metagenomes.  It provides robust
+estimates of genome completeness and contamination by using collocated sets of
+genes that are ubiquitous and single-copy within a phylogenetic lineage.
+Assessment of genome quality can also be examined using plots depicting key
+genomic characteristics (e.g., GC, coding density) which highlight sequences
+outside the expected distributions of a typical genome.  CheckM also provides
+tools for identifying genome bins that are likely candidates for merging based
+on marker set compatibility, similarity in genomic characteristics, and
+proximity within a reference genome.")
+    (license license:gpl3+)))
+
+(define-public umi-tools
+  (package
+    (name "umi-tools")
+    (version "1.0.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "umi_tools" version))
+       (sha256
+        (base32
+         "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
+    (build-system python-build-system)
+    (inputs
+     `(("python-setuptools" ,python-setuptools)
+       ("python-pandas" ,python-pandas)
+       ("python-future" ,python-future)
+       ("python-scipy" ,python-scipy)
+       ("python-matplotlib" ,python-matplotlib)
+       ("python-regex" ,python-regex)
+       ("python-pysam" ,python-pysam)))
+    (native-inputs
+     `(("python-setuptools" ,python-setuptools)
+       ("python-cython" ,python-cython)))
+    (home-page "https://github.com/CGATOxford/UMI-tools")
+    (synopsis "Tools for analyzing unique modular identifiers")
+    (description "This package provides tools for dealing with @dfn{Unique
+Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
+genetic sequences.  There are six tools: the @code{extract} and
+@code{whitelist} commands are used to prepare a fastq containg UMIs @code{+/-}
+cell barcodes for alignment.  The remaining commands, @code{group},
+@code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
+duplicates using the UMIs and perform different levels of analysis depending
+on the needs of the user.")
+    (license license:expat)))
author	Marius Bakke <mbakke@fastmail.com>	2019-05-01 23:11:41 +0200
committer	Marius Bakke <mbakke@fastmail.com>	2019-05-01 23:11:41 +0200
commit	3b458d5462e6bbd852c2dc5c6670d5655abf53f5 (patch)
tree	4f3ccec0de1c355134369333c17e948e3258d546 /gnu/packages/bioinformatics.scm
parent	2ca3fdc2db1aef96fbf702a2f26f5e18ce832038 (diff)
parent	14da3daafc8dd92fdabd3367694c930440fd72cb (diff)
download	patches-3b458d5462e6bbd852c2dc5c6670d5655abf53f5.tar patches-3b458d5462e6bbd852c2dc5c6670d5655abf53f5.tar.gz