diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 636 |
1 files changed, 381 insertions, 255 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 1836939970..8a49e1e350 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -2325,23 +2325,21 @@ files. The code was previously part of the cutadapt tool.") (define-public cutadapt (package (name "cutadapt") - (version "1.18") + (version "2.1") (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/marcelm/cutadapt.git") - (commit (string-append "v" version)))) - (file-name (git-file-name name version)) + (method url-fetch) + (uri (pypi-uri "cutadapt" version)) (sha256 (base32 - "08bbfwyc0kvcd95jf2s95xiv9s3cbsxm39ydl0qck3fw3cviwxpg")))) + "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89")))) (build-system python-build-system) (inputs `(("python-dnaio" ,python-dnaio) ("python-xopen" ,python-xopen))) (native-inputs `(("python-cython" ,python-cython) - ("python-pytest" ,python-pytest))) + ("python-pytest" ,python-pytest) + ("python-setuptools-scm" ,python-setuptools-scm))) (home-page "https://cutadapt.readthedocs.io/en/stable/") (synopsis "Remove adapter sequences from nucleotide sequencing reads") (description @@ -6792,14 +6790,14 @@ databases. Packages produced are intended to be used with AnnotationDbi.") (define-public r-rbgl (package (name "r-rbgl") - (version "1.58.1") + (version "1.58.2") (source (origin (method url-fetch) (uri (bioconductor-uri "RBGL" version)) (sha256 (base32 - "1l5x2icv9di1lr3gqfi0vjnyd9xc3l77yc42ippqd4cadj3d1pzf")))) + "0vhnh47pswnp27c0zqcbnnsayfmq3cxcgrs9g860555ldqfl4cyl")))) (properties `((upstream-name . "RBGL"))) (build-system r-build-system) (propagated-inputs `(("r-graph" ,r-graph))) @@ -7014,29 +7012,6 @@ annotation infrastructure.") "This package provides a pipeline for the analysis of GRO-seq data.") (license license:gpl3+))) -(define-public r-sparql - (package - (name "r-sparql") - (version "1.16") - (source (origin - (method url-fetch) - (uri (cran-uri "SPARQL" version)) - (sha256 - (base32 - "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc")))) - (properties `((upstream-name . "SPARQL"))) - (build-system r-build-system) - (propagated-inputs - `(("r-rcurl" ,r-rcurl) - ("r-xml" ,r-xml))) - (home-page "https://cran.r-project.org/web/packages/SPARQL") - (synopsis "SPARQL client for R") - (description "This package provides an interface to use SPARQL to pose -SELECT or UPDATE queries to an end-point.") - ;; The only license indication is found in the DESCRIPTION file, - ;; which states GPL-3. So we cannot assume GPLv3+. - (license license:gpl3))) - (define-public vsearch (package (name "vsearch") @@ -7243,32 +7218,6 @@ BLAST, KEGG, GenBank, MEDLINE and GO.") ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+) (license (list license:ruby license:lgpl2.1+ license:gpl2+ )))) -(define-public r-acsnminer - (package - (name "r-acsnminer") - (version "0.16.8.25") - (source (origin - (method url-fetch) - (uri (cran-uri "ACSNMineR" version)) - (sha256 - (base32 - "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l")))) - (properties `((upstream-name . "ACSNMineR"))) - (build-system r-build-system) - (propagated-inputs - `(("r-ggplot2" ,r-ggplot2) - ("r-gridextra" ,r-gridextra))) - (home-page "https://cran.r-project.org/web/packages/ACSNMineR") - (synopsis "Gene enrichment analysis") - (description - "This package provides tools to compute and represent gene set enrichment -or depletion from your data based on pre-saved maps from the @dfn{Atlas of -Cancer Signalling Networks} (ACSN) or user imported maps. The gene set -enrichment can be run with hypergeometric test or Fisher exact test, and can -use multiple corrections. Visualization of data can be done either by -barplots or heatmaps.") - (license license:gpl2+))) - (define-public r-biocinstaller (package (name "r-biocinstaller") @@ -7315,30 +7264,6 @@ categorize packages in a Bioconductor package repository according to keywords, also known as views, in a controlled vocabulary.") (license license:artistic2.0))) -(define-public r-bookdown - (package - (name "r-bookdown") - (version "0.9") - (source (origin - (method url-fetch) - (uri (cran-uri "bookdown" version)) - (sha256 - (base32 - "0vg1s1w0l9pm95asqb21yf39mfk1nc9rdhmlys9xwr7p7i7rsz32")))) - (build-system r-build-system) - (propagated-inputs - `(("r-htmltools" ,r-htmltools) - ("r-knitr" ,r-knitr) - ("r-rmarkdown" ,r-rmarkdown) - ("r-tinytex" ,r-tinytex) - ("r-yaml" ,r-yaml) - ("r-xfun" ,r-xfun))) - (home-page "https://github.com/rstudio/bookdown") - (synopsis "Authoring books and technical documents with R markdown") - (description "This package provides output formats and utilities for -authoring books and technical documents with R Markdown.") - (license license:gpl3))) - (define-public r-biocstyle (package (name "r-biocstyle") @@ -7412,29 +7337,6 @@ functionality.") checks on R packages that are to be submitted to the Bioconductor repository.") (license license:artistic2.0))) -(define-public r-optparse - (package - (name "r-optparse") - (version "1.6.1") - (source - (origin - (method url-fetch) - (uri (cran-uri "optparse" version)) - (sha256 - (base32 - "04vyb6dhcga30mvghsg1p052jmf69xqxkvh3hzqz7dscyppy76w1")))) - (build-system r-build-system) - (propagated-inputs - `(("r-getopt" ,r-getopt))) - (home-page - "https://github.com/trevorld/optparse") - (synopsis "Command line option parser") - (description - "This package provides a command line parser inspired by Python's -@code{optparse} library to be used with Rscript to write shebang scripts -that accept short and long options.") - (license license:gpl2+))) - (define-public r-s4vectors (package (name "r-s4vectors") @@ -7462,31 +7364,6 @@ In addition, a few low-level concrete subclasses of general interest (e.g. S4Vectors package itself.") (license license:artistic2.0))) -(define-public r-seqinr - (package - (name "r-seqinr") - (version "3.4-5") - (source - (origin - (method url-fetch) - (uri (cran-uri "seqinr" version)) - (sha256 - (base32 - "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn")))) - (build-system r-build-system) - (propagated-inputs - `(("r-ade4" ,r-ade4) - ("r-segmented" ,r-segmented))) - (inputs - `(("zlib" ,zlib))) - (home-page "http://seqinr.r-forge.r-project.org/") - (synopsis "Biological sequences retrieval and analysis") - (description - "This package provides tools for exploratory data analysis and data -visualization of biological sequence (DNA and protein) data. It also includes -utilities for sequence data management under the ACNUC system.") - (license license:gpl2+))) - (define-public r-iranges (package (name "r-iranges") @@ -7888,10 +7765,10 @@ biological sequences or sets of sequences.") (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html") (synopsis "Interface to samtools, bcftools, and tabix") (description - "This package provides an interface to the 'samtools', 'bcftools', and -'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA, -binary variant call (BCF) and compressed indexed tab-delimited (tabix) -files.") + "This package provides an interface to the @code{samtools}, +@code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence +Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed +tab-delimited (tabix) files.") (license license:expat))) (define-public r-delayedarray @@ -8039,13 +7916,13 @@ as well as query and modify the browser state, such as the current viewport.") (define-public r-genomicfeatures (package (name "r-genomicfeatures") - (version "1.34.6") + (version "1.34.8") (source (origin (method url-fetch) (uri (bioconductor-uri "GenomicFeatures" version)) (sha256 (base32 - "1cz7qx324dmsrkzyhm956cfgr08gpily5rpym7hc8zz5kbl6i3ra")))) + "1sxp86hdsg32l2c85jgic65gy92d8kxsm01264hrx6yikdhicjax")))) (properties `((upstream-name . "GenomicFeatures"))) (build-system r-build-system) @@ -8413,6 +8290,9 @@ secondary structure and comparative analysis in R.") "13fv78sk5g0gqfl3ks3rps3zc1k66a4lzxvgn36r7ix43yxk7hnr")))) (properties `((upstream-name . "Rhtslib"))) (build-system r-build-system) + ;; Without this a temporary directory ends up in the Rhtslib.so binary, + ;; which makes R abort the build. + (arguments '(#:configure-flags '("--no-staged-install"))) (propagated-inputs `(("r-zlibbioc" ,r-zlibbioc))) (inputs @@ -8467,6 +8347,7 @@ paired-end data.") (sha256 (base32 "0ss5hcg2m7gjji6dd23zxa5bd5a7knwcnada4qs5q2l4clgk39ad")))) + (properties `((upstream-name . "RCAS"))) (build-system r-build-system) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) @@ -8587,45 +8468,6 @@ characterization and visualization of a wide range of mutational patterns in SNV base substitution data.") (license license:expat))) -(define-public r-wgcna - (package - (name "r-wgcna") - (version "1.66") - (source - (origin - (method url-fetch) - (uri (cran-uri "WGCNA" version)) - (sha256 - (base32 - "0rhnyhzfn93yp24jz9v6dzrmyizwzdw070a7idm0k33w1cm8sjqv")))) - (properties `((upstream-name . "WGCNA"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-doparallel" ,r-doparallel) - ("r-dynamictreecut" ,r-dynamictreecut) - ("r-fastcluster" ,r-fastcluster) - ("r-foreach" ,r-foreach) - ("r-go-db" ,r-go-db) - ("r-hmisc" ,r-hmisc) - ("r-impute" ,r-impute) - ("r-rcpp" ,r-rcpp) - ("r-robust" ,r-robust) - ("r-survival" ,r-survival) - ("r-matrixstats" ,r-matrixstats) - ("r-preprocesscore" ,r-preprocesscore))) - (home-page - "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/") - (synopsis "Weighted correlation network analysis") - (description - "This package provides functions necessary to perform Weighted -Correlation Network Analysis on high-dimensional data. It includes functions -for rudimentary data cleaning, construction and summarization of correlation -networks, module identification and functions for relating both variables and -modules to sample traits. It also includes a number of utility functions for -data manipulation and visualization.") - (license license:gpl2+))) - (define-public r-chipkernels (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372") (revision "1")) @@ -8693,39 +8535,6 @@ bound and non bound genomic regions to accurately identify transcription factors bound at the specific regions.") (license license:gpl2+))) -(define-public r-gkmsvm - (package - (name "r-gkmsvm") - (version "0.79.0") - (source - (origin - (method url-fetch) - (uri (cran-uri "gkmSVM" version)) - (sha256 - (base32 - "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3")))) - (properties `((upstream-name . "gkmSVM"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-biostrings" ,r-biostrings) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-kernlab" ,r-kernlab) - ("r-rcpp" ,r-rcpp) - ("r-rocr" ,r-rocr) - ("r-rtracklayer" ,r-rtracklayer) - ("r-s4vectors" ,r-s4vectors) - ("r-seqinr" ,r-seqinr))) - (home-page "https://cran.r-project.org/web/packages/gkmSVM") - (synopsis "Gapped-kmer support vector machine") - (description - "This R package provides tools for training gapped-kmer SVM classifiers -for DNA and protein sequences. This package supports several sequence -kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.") - (license license:gpl2+))) - (define-public r-tximport (package (name "r-tximport") @@ -9632,40 +9441,33 @@ and irregular enzymatic cleavages, mass measurement accuracy, etc.") (define-public r-seurat (package (name "r-seurat") - (version "2.3.4") + (version "3.0.0") (source (origin (method url-fetch) (uri (cran-uri "Seurat" version)) (sha256 (base32 - "0l8bv4i9nzz26mirnva10mq6pimibj24vk7vpvfypgn7xk4942hd")))) + "183lm2wk0i3g114jbdf7pb4ssizr48qzqv3cknbsiackr8kvpsvc")))) (properties `((upstream-name . "Seurat"))) (build-system r-build-system) (propagated-inputs `(("r-ape" ,r-ape) ("r-cluster" ,r-cluster) ("r-cowplot" ,r-cowplot) - ("r-dosnow" ,r-dosnow) - ("r-dplyr" ,r-dplyr) - ("r-dtw" ,r-dtw) ("r-fitdistrplus" ,r-fitdistrplus) - ("r-foreach" ,r-foreach) - ("r-fpc" ,r-fpc) + ("r-future" ,r-future) + ("r-future-apply" ,r-future-apply) ("r-ggplot2" ,r-ggplot2) + ("r-ggrepel" ,r-ggrepel) ("r-ggridges" ,r-ggridges) - ("r-gplots" ,r-gplots) - ("r-hdf5r" ,r-hdf5r) - ("r-hmisc" ,r-hmisc) - ("r-httr" ,r-httr) ("r-ica" ,r-ica) ("r-igraph" ,r-igraph) ("r-irlba" ,r-irlba) - ("r-lars" ,r-lars) + ("r-kernsmooth" ,r-kernsmooth) ("r-lmtest" ,r-lmtest) ("r-mass" ,r-mass) ("r-matrix" ,r-matrix) ("r-metap" ,r-metap) - ("r-mixtools" ,r-mixtools) ("r-pbapply" ,r-pbapply) ("r-plotly" ,r-plotly) ("r-png" ,r-png) @@ -9674,12 +9476,14 @@ and irregular enzymatic cleavages, mass measurement accuracy, etc.") ("r-rcpp" ,r-rcpp) ("r-rcppeigen" ,r-rcppeigen) ("r-rcppprogress" ,r-rcppprogress) - ("r-reshape2" ,r-reshape2) ("r-reticulate" ,r-reticulate) + ("r-rlang" ,r-rlang) ("r-rocr" ,r-rocr) + ("r-rsvd" ,r-rsvd) ("r-rtsne" ,r-rtsne) + ("r-scales" ,r-scales) + ("r-sctransform" ,r-sctransform) ("r-sdmtools" ,r-sdmtools) - ("r-tidyr" ,r-tidyr) ("r-tsne" ,r-tsne))) (home-page "http://www.satijalab.org/seurat") (synopsis "Seurat is an R toolkit for single cell genomics") @@ -10026,14 +9830,14 @@ originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2): (define-public r-ensembldb (package (name "r-ensembldb") - (version "2.6.7") + (version "2.6.8") (source (origin (method url-fetch) (uri (bioconductor-uri "ensembldb" version)) (sha256 (base32 - "1wqq0m1fgvgkzq5zr2s9cj2s7qkg9lx3dwwsqixzs5fn52p4dn7f")))) + "0gijx2l2y00h6gfj3gfr7rd4vva6qf2vkfdfy5gdmvqlxy84ka38")))) (build-system r-build-system) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) @@ -10152,6 +9956,16 @@ effort and encourages consistency.") (base32 "0wq49qqzkcn8s19xgaxf2s1j1a563d7pbhhvris6fhxfdjsz4934")))) (build-system r-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + ;; See https://github.com/tengfei/ggbio/issues/117 + ;; This fix will be included in the next release. + (add-after 'unpack 'fix-typo + (lambda _ + (substitute* "R/GGbio-class.R" + (("fechable") "fetchable")) + #t))))) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) ("r-annotationfilter" ,r-annotationfilter) @@ -10352,14 +10166,14 @@ by Ernst and Kellis.") (define-public r-ldblock (package (name "r-ldblock") - (version "1.12.0") + (version "1.12.1") (source (origin (method url-fetch) (uri (bioconductor-uri "ldblock" version)) (sha256 (base32 - "0xbf4pmhrk5fnd1iz5wzjvdr75v114bwpznhcig4wiqmxc27sips")))) + "01lf74pby7si2g3kgc10qzr6lkcbigqcgqs2j3anc38vzxv0zhwv")))) (build-system r-build-system) (propagated-inputs `(("r-biocgenerics" ,r-biocgenerics) @@ -10384,14 +10198,14 @@ defining LD blocks.") (define-public r-gqtlstats (package (name "r-gqtlstats") - (version "1.14.0") + (version "1.14.1") (source (origin (method url-fetch) (uri (bioconductor-uri "gQTLstats" version)) (sha256 (base32 - "1sg9kw59dlayj7qxql9pd93d4hmml504sa3kkfpzfh3xri7m5pxf")))) + "1rkbnb3h02fdksc4nacqvmq4jgbj9fz4hm7j51yr2ggcgcykwraa")))) (properties `((upstream-name . "gQTLstats"))) (build-system r-build-system) (propagated-inputs @@ -10625,14 +10439,14 @@ block processing.") (define-public r-rhdf5lib (package (name "r-rhdf5lib") - (version "1.4.2") + (version "1.4.3") (source (origin (method url-fetch) (uri (bioconductor-uri "Rhdf5lib" version)) (sha256 (base32 - "06bxd3wz8lrvh2hzvmjpdv4lvzj5lz9353bw5b3zb98cb8w9r2j5")) + "0hjhjvg2kss71fkmxlbgnyyy1agwzgq57rxkgkm4riw82x2rvw7q")) (modules '((guix build utils))) (snippet '(begin @@ -10861,14 +10675,14 @@ memory usage and processing time is minimized.") (define-public r-phangorn (package (name "r-phangorn") - (version "2.4.0") + (version "2.5.3") (source (origin (method url-fetch) (uri (cran-uri "phangorn" version)) (sha256 (base32 - "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i")))) + "1bv86yfk5r015s7ij6v4zz7bagwrw9m13yfs5853drxb19d5h1m3")))) (build-system r-build-system) (propagated-inputs `(("r-ape" ,r-ape) @@ -11075,23 +10889,31 @@ with narrow binding events such as transcription factor ChIP-seq.") (define-public trim-galore (package (name "trim-galore") - (version "0.4.5") + (version "0.6.1") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/FelixKrueger/TrimGalore.git") (commit version))) - (file-name (string-append name "-" version "-checkout")) + (file-name (git-file-name name version)) (sha256 (base32 - "0x5892l48c816pf00wmnz5vq0zq6170d3xc8zrxncd4jcz7h1p71")))) + "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv")))) (build-system gnu-build-system) (arguments `(#:tests? #f ; no tests #:phases (modify-phases %standard-phases - (delete 'configure) + (replace 'configure + (lambda _ + ;; Trim Galore tries to figure out what version of Python + ;; cutadapt is using by looking at the shebang. Of course that + ;; doesn't work, because cutadapt is wrapped in a shell script. + (substitute* "trim_galore" + (("my \\$python_return.*") + "my $python_return = \"Python 3.999\";\n")) + #t)) (delete 'build) (add-after 'unpack 'hardcode-tool-references (lambda* (#:key inputs #:allow-other-keys) @@ -11100,14 +10922,18 @@ with narrow binding events such as transcription factor ChIP-seq.") (string-append "$path_to_cutadapt = '" (assoc-ref inputs "cutadapt") "/bin/cutadapt'")) - (("\\| gzip") - (string-append "| " + (("\\$compression_path = \"gzip\"") + (string-append "$compression_path = \"" (assoc-ref inputs "gzip") - "/bin/gzip")) + "/bin/gzip\"")) (("\"gunzip") (string-append "\"" (assoc-ref inputs "gzip") - "/bin/gunzip"))) + "/bin/gunzip")) + (("\"pigz") + (string-append "\"" + (assoc-ref inputs "pigz") + "/bin/pigz"))) #t)) (replace 'install (lambda* (#:key outputs #:allow-other-keys) @@ -11119,6 +10945,7 @@ with narrow binding events such as transcription factor ChIP-seq.") (inputs `(("gzip" ,gzip) ("perl" ,perl) + ("pigz" ,pigz) ("cutadapt" ,cutadapt))) (native-inputs `(("unzip" ,unzip))) @@ -12220,6 +12047,23 @@ graphs. This library makes it easy to work with @file{.loom} files for single-cell RNA-seq data.") (license license:bsd-3))) +;; pigx-scrnaseq does not work with the latest version of loompy. +(define-public python-loompy-for-pigx-scrnaseq + (package (inherit python-loompy) + (name "python-loompy") + (version "2.0.3") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/linnarsson-lab/loompy.git") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0pjyl532pl8sbv71yci6h0agchn0naw2qjcwj50n6afrsahbsag3")))) + ;; There are none. + (arguments '(#:tests? #f)))) + ;; We cannot use the latest commit because it requires Java 9. (define-public java-forester (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6") @@ -12833,7 +12677,7 @@ expression report comparing samples in an easily configurable manner.") (define-public pigx-chipseq (package (name "pigx-chipseq") - (version "0.0.31") + (version "0.0.40") (source (origin (method url-fetch) (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/" @@ -12841,7 +12685,7 @@ expression report comparing samples in an easily configurable manner.") "/pigx_chipseq-" version ".tar.gz")) (sha256 (base32 - "0l3vd9xwqzap3mmyj8xwqp84kj7scbq308diqnwg2albphl75xqs")))) + "0y9x62cfwzhsp82imnawyamxp58bcb00yjxdy44spylqnjdlsaj8")))) (build-system gnu-build-system) ;; parts of the tests rely on access to the network (arguments '(#:tests? #f)) @@ -12990,7 +12834,7 @@ methylation and segmentation.") ("python-pandas" ,python-pandas) ("python-magic" ,python-magic) ("python-numpy" ,python-numpy) - ("python-loompy" ,python-loompy) + ("python-loompy" ,python-loompy-for-pigx-scrnaseq) ("ghc-pandoc" ,ghc-pandoc) ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc) ("samtools" ,samtools) @@ -13461,31 +13305,57 @@ in RNA-seq data.") (define-public python-scanpy (package (name "python-scanpy") - (version "1.2.2") + (version "1.4") + ;; Fetch from git because the pypi tarball does not include tests. (source (origin - (method url-fetch) - (uri (pypi-uri "scanpy" version)) + (method git-fetch) + (uri (git-reference + (url "https://github.com/theislab/scanpy.git") + (commit version))) + (file-name (git-file-name name version)) (sha256 (base32 - "1ak7bxms5a0yvf65prppq2g38clkv7c7jnjbnfpkh3xxv7q512jz")))) + "0zn6x6c0cnm1a20i6isigwb51g3pr9zpjk8r1minjqnxi5yc9pm4")))) (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (replace 'check + (lambda* (#:key inputs #:allow-other-keys) + ;; These tests require Internet access. + (delete-file-recursively "scanpy/tests/notebooks") + (delete-file "scanpy/tests/test_clustering.py") + + ;; TODO: I can't get the plotting tests to work, even with Xvfb. + (delete-file "scanpy/tests/test_plotting.py") + (delete-file "scanpy/tests/test_preprocessing.py") + (delete-file "scanpy/tests/test_read_10x.py") + + (setenv "PYTHONPATH" + (string-append (getcwd) ":" + (getenv "PYTHONPATH"))) + (invoke "pytest") + #t))))) (propagated-inputs `(("python-anndata" ,python-anndata) + ("python-h5py" ,python-h5py) ("python-igraph" ,python-igraph) - ("python-numba" ,python-numba) ("python-joblib" ,python-joblib) + ("python-louvain" ,python-louvain) + ("python-matplotlib" ,python-matplotlib) ("python-natsort" ,python-natsort) ("python-networkx" ,python-networkx) - ("python-statsmodels" ,python-statsmodels) - ("python-scikit-learn" ,python-scikit-learn) - ("python-matplotlib" ,python-matplotlib) + ("python-numba" ,python-numba) ("python-pandas" ,python-pandas) + ("python-scikit-learn" ,python-scikit-learn) ("python-scipy" ,python-scipy) ("python-seaborn" ,python-seaborn) - ("python-h5py" ,python-h5py) + ("python-statsmodels" ,python-statsmodels) ("python-tables" ,python-tables))) - (home-page "http://github.com/theislab/scanpy") + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://github.com/theislab/scanpy") (synopsis "Single-Cell Analysis in Python.") (description "Scanpy is a scalable toolkit for analyzing single-cell gene expression data. It includes preprocessing, visualization, clustering, @@ -13494,6 +13364,38 @@ Python-based implementation efficiently deals with datasets of more than one million cells.") (license license:bsd-3))) +(define-public python-bbknn + (package + (name "python-bbknn") + (version "1.3.1") + (source + (origin + (method url-fetch) + (uri (pypi-uri "bbknn" version)) + (sha256 + (base32 + "1qgdganvj3lyxj84v7alm23b9vqhwpn8z0115qndpnpy90qxynwz")))) + (build-system python-build-system) + (propagated-inputs + `(("python-annoy" ,python-annoy) + ("python-cython" ,python-cython) + ("python-faiss" ,python-faiss) + ("python-numpy" ,python-numpy) + ("python-scanpy" ,python-scanpy))) + (home-page "https://github.com/Teichlab/bbknn") + (synopsis "Batch balanced KNN") + (description "BBKNN is a batch effect removal tool that can be directly +used in the Scanpy workflow. It serves as an alternative to +@code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour +graph for subsequent use in clustering, pseudotime and UMAP visualisation. If +technical artifacts are present in the data, they will make it challenging to +link corresponding cell types across different batches. BBKNN actively +combats this effect by splitting your data into batches and finding a smaller +number of neighbours for each cell within each of the groups. This helps +create connections between analogous cells in different batches without +altering the counts or PCA space.") + (license license:expat))) + (define-public gffcompare (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41") (revision "1")) @@ -14661,3 +14563,227 @@ overlapping paired-ended reads into (longer) consensus sequences. Additionally, the AdapterRemoval may be used to recover a consensus adapter sequence for paired-ended data, for which this information is not available.") (license license:gpl3+))) + +(define-public pplacer + (let ((commit "807f6f3")) + (package + (name "pplacer") + ;; The commit should be updated with each version change. + (version "1.1.alpha19") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/matsen/pplacer.git") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn")))) + (build-system ocaml-build-system) + (arguments + `(#:modules ((guix build ocaml-build-system) + (guix build utils) + (ice-9 ftw)) + #:phases + (modify-phases %standard-phases + (delete 'configure) + (add-after 'unpack 'fix-build-with-latest-ocaml + (lambda _ + (substitute* "myocamlbuild.ml" + (("dep \\[\"c_pam\"\\]" m) + (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n" + m)) + (("let run_and_read" m) + (string-append " +let split s ch = + let x = ref [] in + let rec go s = + let pos = String.index s ch in + x := (String.before s pos)::!x; + go (String.after s (pos + 1)) + in + try go s + with Not_found -> !x +let split_nl s = split s '\\n' +let before_space s = + try String.before s (String.index s ' ') + with Not_found -> s + +" m)) + (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m) + (string-append "List.map before_space (split_nl & " m ")")) + ((" blank_sep_strings &") "") + ((" Lexing.from_string &") "")) + #t)) + (add-after 'unpack 'replace-bundled-cddlib + (lambda* (#:key inputs #:allow-other-keys) + (let* ((cddlib-src (assoc-ref inputs "cddlib-src")) + (local-dir "cddlib_guix")) + (mkdir local-dir) + (with-directory-excursion local-dir + (invoke "tar" "xvf" cddlib-src)) + (let ((cddlib-src-folder + (string-append local-dir "/" + (list-ref (scandir local-dir) 2) + "/lib-src"))) + (for-each make-file-writable (find-files "cdd_src" ".*")) + (for-each + (lambda (file) + (copy-file file + (string-append "cdd_src/" (basename file)))) + (find-files cddlib-src-folder ".*[ch]$"))) + #t))) + (add-after 'unpack 'fix-makefile + (lambda _ + ;; Remove system calls to 'git'. + (substitute* "Makefile" + (("^DESCRIPT:=pplacer-.*") + (string-append + "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n"))) + (substitute* "myocamlbuild.ml" + (("git describe --tags --long .*\\\" with") + (string-append + "echo -n v" ,version "-" ,commit "\" with"))) + #t)) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin"))) + (copy-recursively "bin" bin)) + #t))))) + (inputs + `(("zlib" ,zlib "static") + ("gsl" ,gsl) + ("ocaml-ounit" ,ocaml-ounit) + ("ocaml-batteries" ,ocaml-batteries) + ("ocaml-camlzip" ,camlzip) + ("ocaml-csv" ,ocaml-csv) + ("ocaml-sqlite3" ,ocaml-sqlite3) + ("ocaml-xmlm" ,ocaml-xmlm) + ("ocaml-mcl" ,ocaml-mcl) + ("ocaml-gsl" ,ocaml-gsl-1))) + (native-inputs + `(("cddlib-src" ,(package-source cddlib)) + ("ocamlbuild" ,ocamlbuild) + ("pkg-config" ,pkg-config))) + (propagated-inputs + `(("pplacer-scripts" ,pplacer-scripts))) + (synopsis "Phylogenetic placement of biological sequences") + (description + "Pplacer places query sequences on a fixed reference phylogenetic tree +to maximize phylogenetic likelihood or posterior probability according to a +reference alignment. Pplacer is designed to be fast, to give useful +information about uncertainty, and to offer advanced visualization and +downstream analysis.") + (home-page "http://matsen.fhcrc.org/pplacer") + (license license:gpl3)))) + +;; This package is installed alongside 'pplacer'. It is a separate package so +;; that it can use the python-build-system for the scripts that are +;; distributed alongside the main OCaml binaries. +(define pplacer-scripts + (package + (inherit pplacer) + (name "pplacer-scripts") + (build-system python-build-system) + (arguments + `(#:python ,python-2 + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'enter-scripts-dir + (lambda _ (chdir "scripts") #t)) + (replace 'check + (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t)) + (add-after 'install 'wrap-executables + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin"))) + (let ((path (string-append + (assoc-ref inputs "hmmer") "/bin:" + (assoc-ref inputs "infernal") "/bin"))) + (display path) + (wrap-program (string-append bin "/refpkg_align.py") + `("PATH" ":" prefix (,path)))) + (let ((path (string-append + (assoc-ref inputs "hmmer") "/bin"))) + (wrap-program (string-append bin "/hrefpkg_query.py") + `("PATH" ":" prefix (,path))))) + #t))))) + (inputs + `(("infernal" ,infernal) + ("hmmer" ,hmmer))) + (propagated-inputs + `(("python-biopython" ,python2-biopython) + ("taxtastic" ,taxtastic))) + (synopsis "Pplacer Python scripts"))) + +(define-public python2-checkm-genome + (package + (name "python2-checkm-genome") + (version "1.0.13") + (source + (origin + (method url-fetch) + (uri (pypi-uri "checkm-genome" version)) + (sha256 + (base32 + "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz")))) + (build-system python-build-system) + (arguments + `(#:python ,python-2 + #:tests? #f)) ; some tests are interactive + (propagated-inputs + `(("python-dendropy" ,python2-dendropy) + ("python-matplotlib" ,python2-matplotlib) + ("python-numpy" ,python2-numpy) + ("python-pysam" ,python2-pysam) + ("python-scipy" ,python2-scipy))) + (home-page "http://pypi.python.org/pypi/checkm/") + (synopsis "Assess the quality of putative genome bins") + (description + "CheckM provides a set of tools for assessing the quality of genomes +recovered from isolates, single cells, or metagenomes. It provides robust +estimates of genome completeness and contamination by using collocated sets of +genes that are ubiquitous and single-copy within a phylogenetic lineage. +Assessment of genome quality can also be examined using plots depicting key +genomic characteristics (e.g., GC, coding density) which highlight sequences +outside the expected distributions of a typical genome. CheckM also provides +tools for identifying genome bins that are likely candidates for merging based +on marker set compatibility, similarity in genomic characteristics, and +proximity within a reference genome.") + (license license:gpl3+))) + +(define-public umi-tools + (package + (name "umi-tools") + (version "1.0.0") + (source + (origin + (method url-fetch) + (uri (pypi-uri "umi_tools" version)) + (sha256 + (base32 + "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a")))) + (build-system python-build-system) + (inputs + `(("python-setuptools" ,python-setuptools) + ("python-pandas" ,python-pandas) + ("python-future" ,python-future) + ("python-scipy" ,python-scipy) + ("python-matplotlib" ,python-matplotlib) + ("python-regex" ,python-regex) + ("python-pysam" ,python-pysam))) + (native-inputs + `(("python-setuptools" ,python-setuptools) + ("python-cython" ,python-cython))) + (home-page "https://github.com/CGATOxford/UMI-tools") + (synopsis "Tools for analyzing unique modular identifiers") + (description "This package provides tools for dealing with @dfn{Unique +Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in +genetic sequences. There are six tools: the @code{extract} and +@code{whitelist} commands are used to prepare a fastq containg UMIs @code{+/-} +cell barcodes for alignment. The remaining commands, @code{group}, +@code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR +duplicates using the UMIs and perform different levels of analysis depending +on the needs of the user.") + (license license:expat))) |