aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm430
1 files changed, 407 insertions, 23 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 430551887d..d91ff13b46 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -104,6 +104,7 @@
#:use-module (gnu packages java)
#:use-module (gnu packages java-compression)
#:use-module (gnu packages jemalloc)
+ #:use-module (gnu packages jupyter)
#:use-module (gnu packages linux)
#:use-module (gnu packages lisp-xyz)
#:use-module (gnu packages logging)
@@ -4534,7 +4535,7 @@ The main functions of FastQC are:
(define-public fastp
(package
(name "fastp")
- (version "0.14.1")
+ (version "0.20.1")
(source
(origin
(method git-fetch)
@@ -4544,19 +4545,18 @@ The main functions of FastQC are:
(file-name (git-file-name name version))
(sha256
(base32
- "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
+ "0ly8mxdvrcy23jwxyppysx3dhb1lwsqhfbgpyvargxhfk6k700x4"))))
(build-system gnu-build-system)
(arguments
`(#:tests? #f ; there are none
#:make-flags
- (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
+ (list (string-append "PREFIX=" (assoc-ref %outputs "out")))
#:phases
(modify-phases %standard-phases
(delete 'configure)
(add-before 'install 'create-target-dir
(lambda* (#:key outputs #:allow-other-keys)
- (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
- #t)))))
+ (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
(inputs
`(("zlib" ,zlib)))
(home-page "https://github.com/OpenGene/fastp/")
@@ -10381,7 +10381,7 @@ once. This package provides tools to perform Drop-seq analyses.")
(define-public pigx-rnaseq
(package
(name "pigx-rnaseq")
- (version "0.0.18")
+ (version "0.0.19")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
@@ -10389,7 +10389,7 @@ once. This package provides tools to perform Drop-seq analyses.")
"/pigx_rnaseq-" version ".tar.gz"))
(sha256
(base32
- "1622l6grmsk0wm859rvllngx29q3v16jjvzcdq2bmrlamccrj82y"))))
+ "1ja3bda1appxrzbfy7wp7khy30mm7lic8xbq3gkbpc5bld3as9cm"))))
(build-system gnu-build-system)
(arguments
`(#:parallel-tests? #f ; not supported
@@ -11387,38 +11387,53 @@ implementation differs in these ways:
(define-public python-scanpy
(package
(name "python-scanpy")
- (version "1.7.2")
+ (version "1.8.1")
(source
(origin
- (method url-fetch)
- (uri (pypi-uri "scanpy" version))
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/theislab/scanpy")
+ (commit version)))
+ (file-name (git-file-name name version))
(sha256
(base32
- "0c66adnfizsyk0h8bv2yhmay876z0klpxwpn4z6m71wly7yplpmd"))))
+ "0w1qmv3djqi8q0sn5hv34ivzs157fwjjb9nflfnagnhpxmw8vx5g"))))
(build-system python-build-system)
(arguments
`(#:phases
(modify-phases %standard-phases
+ (replace 'build
+ (lambda _
+ (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" ,version)
+ ;; ZIP does not support timestamps before 1980.
+ (setenv "SOURCE_DATE_EPOCH" "315532800")
+ (invoke "flit" "build")))
+ (replace 'install
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (add-installed-pythonpath inputs outputs)
+ (let ((out (assoc-ref outputs "out")))
+ (for-each (lambda (wheel)
+ (format #true wheel)
+ (invoke "python" "-m" "pip" "install"
+ wheel (string-append "--prefix=" out)))
+ (find-files "dist" "\\.whl$")))))
(replace 'check
(lambda* (#:key inputs #:allow-other-keys)
;; These tests require Internet access.
(delete-file-recursively "scanpy/tests/notebooks")
(delete-file "scanpy/tests/test_clustering.py")
(delete-file "scanpy/tests/test_datasets.py")
+ (delete-file "scanpy/tests/test_score_genes.py")
(delete-file "scanpy/tests/test_highly_variable_genes.py")
;; TODO: I can't get the plotting tests to work, even with Xvfb.
- (delete-file "scanpy/tests/test_plotting.py")
+ (delete-file "scanpy/tests/test_embedding_plots.py")
(delete-file "scanpy/tests/test_preprocessing.py")
(delete-file "scanpy/tests/test_read_10x.py")
- ;; The following tests need anndata.tests, which aren't included
- ;; in the final python-anndata package.
- (delete-file "scanpy/tests/test_combat.py")
- (delete-file "scanpy/tests/test_embedding_plots.py")
- (delete-file "scanpy/tests/test_normalization.py")
- (delete-file "scanpy/tests/test_pca.py")
- (delete-file "scanpy/tests/external/test_scrublet.py")
+ ;; TODO: these fail with TypingError and "Use of unsupported
+ ;; NumPy function 'numpy.split'".
+ (delete-file "scanpy/tests/test_metrics.py")
;; The following tests requires 'scanorama', which isn't
;; packaged yet.
@@ -11426,8 +11441,24 @@ implementation differs in these ways:
(setenv "PYTHONPATH"
(string-append (getcwd) ":"
+ (assoc-ref inputs "python-anndata:source") ":"
(getenv "PYTHONPATH")))
- (invoke "pytest"))))))
+ (invoke "pytest" "-vv"
+ "-k"
+ ;; Plot tests that fail.
+ (string-append "not test_dotplot_matrixplot_stacked_violin"
+ " and not test_violin_without_raw"
+ " and not test_correlation"
+ " and not test_scatterplots"
+ " and not test_scatter_embedding_add_outline_vmin_vmax_norm"
+ " and not test_paga"
+ " and not test_paga_compare"
+
+ ;; These try to connect to the network
+ " and not test_plot_rank_genes_groups_gene_symbols"
+ " and not test_pca_chunked"
+ " and not test_pca_sparse"
+ " and not test_pca_reproducible")))))))
(propagated-inputs
`(("python-anndata" ,python-anndata)
("python-h5py" ,python-h5py)
@@ -11445,16 +11476,19 @@ implementation differs in these ways:
("python-scikit-learn" ,python-scikit-learn)
("python-scipy" ,python-scipy)
("python-seaborn" ,python-seaborn)
+ ("python-sinfo" ,python-sinfo)
("python-statsmodels" ,python-statsmodels)
("python-tables" ,python-tables)
("python-pytoml" ,python-pytoml)
("python-tqdm" ,python-tqdm)
("python-umap-learn" ,python-umap-learn)))
(native-inputs
- `(("python-leidenalg" ,python-leidenalg)
+ `(;; This package needs anndata.tests, which is not installed.
+ ("python-anndata:source" ,(package-source python-anndata))
+ ("python-flit" ,python-flit)
+ ("python-leidenalg" ,python-leidenalg)
("python-pytest" ,python-pytest)
- ("python-setuptools-scm" ,python-setuptools-scm)
- ("python-sinfo" ,python-sinfo)))
+ ("python-setuptools-scm" ,python-setuptools-scm)))
(home-page "https://github.com/theislab/scanpy")
(synopsis "Single-Cell Analysis in Python.")
(description "Scanpy is a scalable toolkit for analyzing single-cell gene
@@ -14203,6 +14237,32 @@ sequencing (e.g. mapping or base/indel alignment uncertainty), which are
usually ignored by other methods or only used for filtering.")
(license license:expat)))
+(define-public ivar
+ (package
+ (name "ivar")
+ (version "1.3.1")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/andersen-lab/ivar")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "044xa0hm3b8fga64csrdx05ih8w7kwmvcdrdrhkg8j11ml4bi4xv"))))
+ (build-system gnu-build-system)
+ (inputs
+ `(("htslib" ,htslib)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)))
+ (home-page "https://andersen-lab.github.io/ivar/html/")
+ (synopsis "Tools for amplicon-based sequencing")
+ (description "iVar is a computational package that contains functions
+broadly useful for viral amplicon-based sequencing. ")
+ (license license:gpl3+)))
+
(define-public python-pyliftover
(package
(name "python-pyliftover")
@@ -14678,6 +14738,263 @@ produced by Oxford Nanopore Technologies’ MinION, GridION or PromethION
instruments, or Pacific Biosciences RSII or Sequel sequencers.")
(license license:expat)))
+(define-public python-strawc
+ (package
+ (name "python-strawc")
+ (version "0.0.2.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "strawC" version))
+ (sha256
+ (base32
+ "1z1gy8n56lhriy6hdkh9r82ndikndipq2cy2wh8q185qig4rimr6"))))
+ (build-system python-build-system)
+ (inputs
+ `(("curl" ,curl)
+ ("zlib" ,zlib)))
+ (propagated-inputs
+ `(("pybind11" ,pybind11)))
+ (home-page "https://github.com/aidenlab/straw")
+ (synopsis "Stream data from .hic files")
+ (description "Straw is library which allows rapid streaming of contact
+data from @file{.hic} files. This package provides Python bindings.")
+ (license license:expat)))
+
+(define-public python-pybbi
+ (package
+ (name "python-pybbi")
+ (version "0.3.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "pybbi" version))
+ (sha256
+ (base32
+ "1hvy2f28i2b41l1pq15vciqbj538n0lichp8yr6413jmgg06xdsk"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:tests? #false ; tests require network access
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'set-cc
+ (lambda _ (setenv "CC" "gcc")))
+ (replace 'check
+ (lambda* (#:key inputs outputs tests? #:allow-other-keys)
+ (when tests?
+ (add-installed-pythonpath inputs outputs)
+ (copy-recursively "tests" "/tmp/tests")
+ (with-directory-excursion "/tmp/tests"
+ (invoke "python" "-m" "pytest" "-v"))))))))
+ (native-inputs
+ `(("pkg-config" ,pkg-config)
+ ("python-pkgconfig" ,python-pkgconfig)
+ ("python-pytest" ,python-pytest)))
+ (inputs
+ `(("libpng" ,libpng)
+ ("openssl" ,openssl)
+ ("zlib" ,zlib)))
+ (propagated-inputs
+ `(("python-cython" ,python-cython)
+ ("python-numpy" ,python-numpy)
+ ("python-pandas" ,python-pandas)
+ ("python-six" ,python-six)))
+ (home-page "https://github.com/nvictus/pybbi")
+ (synopsis "Python bindings to UCSC Big Binary file library")
+ (description
+ "This package provides Python bindings to the UCSC Big
+Binary (bigWig/bigBed) file library. This provides read-level access to local
+and remote bigWig and bigBed files but no write capabilitites. The main
+feature is fast retrieval of range queries into numpy arrays.")
+ (license license:expat)))
+
+(define-public python-dna-features-viewer
+ (package
+ (name "python-dna-features-viewer")
+ (version "3.0.3")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "dna_features_viewer" version))
+ (sha256
+ (base32
+ "0vci6kg2id6r6rh3cifq7ccnh7j0mb8iqg3hji6rva0ayrdqzafc"))))
+ (build-system python-build-system)
+ (arguments '(#:tests? #false)) ; there are none
+ (propagated-inputs
+ `(("python-biopython" ,python-biopython)
+ ("python-matplotlib" ,python-matplotlib)))
+ (home-page
+ "https://github.com/Edinburgh-Genome-Foundry/DnaFeaturesViewer")
+ (synopsis "Plot features from DNA sequences")
+ (description
+ "DNA Features Viewer is a Python library to visualize DNA features,
+e.g. from GenBank or Gff files, or Biopython SeqRecords.")
+ (license license:expat)))
+
+(define-public python-coolbox
+ (package
+ (name "python-coolbox")
+ (version "0.3.8")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "coolbox" version))
+ (sha256
+ (base32
+ "0gqp76285w9klswr47y6kxbzwhv033b26jfa179kccfhiaq5p2xa"))))
+ (build-system python-build-system)
+ (arguments '(#:tests? #false)) ; there are none
+ (inputs
+ `(("pybind11" ,pybind11)))
+ (propagated-inputs
+ `(("python-cooler" ,python-cooler)
+ ("python-dna-features-viewer" ,python-dna-features-viewer)
+ ("python-fire" ,python-fire)
+ ("python-h5py" ,python-h5py)
+ ("python-intervaltree" ,python-intervaltree)
+ ("python-ipywidgets" ,python-ipywidgets)
+ ("jupyter" ,jupyter)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-nbformat" ,python-nbformat)
+ ("python-numpy" ,python-numpy)
+ ("python-numpydoc" ,python-numpydoc)
+ ("python-pandas" ,python-pandas)
+ ("python-pybbi" ,python-pybbi)
+ ("python-pytest" ,python-pytest)
+ ("python-scipy" ,python-scipy)
+ ("python-statsmodels" ,python-statsmodels)
+ ("python-strawc" ,python-strawc)
+ ("python-svgutils" ,python-svgutils)
+ ("python-termcolor" ,python-termcolor)
+ ("python-voila" ,python-voila)))
+ (home-page "https://github.com/GangCaoLab/CoolBox")
+ (synopsis "Genomic data visualization toolkit")
+ (description
+ "CoolBox is a toolkit for visual analysis of genomics data. It aims to
+be highly compatible with the Python ecosystem, easy to use and highly
+customizable with a well-designed user interface. It can be used in various
+visualization situations, for example, to produce high-quality genome track
+plots or fetch common used genomic data files with a Python script or command
+line, interactively explore genomic data within Jupyter environment or web
+browser.")
+ (license license:gpl3+)))
+
+(define-public scregseg
+ (package
+ (name "scregseg")
+ (version "0.1.1")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/BIMSBbioinfo/scregseg")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1k8hllr5if6k2mm2zj391fv40sfc008cjm04l9vgfsdppb80i112"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:tests? #false ; tests require network access
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'do-not-fail-to-find-sklearn
+ (lambda _
+ ;; XXX: I have no idea why it cannot seem to find sklearn.
+ (substitute* "setup.py"
+ (("'sklearn',") "")))))))
+ (native-inputs
+ `(("python-cython" ,python-cython)))
+ (propagated-inputs
+ `(("python-scikit-learn" ,python-scikit-learn)
+ ("python-scipy" ,python-scipy)
+ ("python-numpy" ,python-numpy)
+ ("python-hmmlearn" ,python-hmmlearn)
+ ("python-pandas" ,python-pandas)
+ ("python-numba" ,python-numba)
+ ("python-anndata" ,python-anndata)
+ ("python-scanpy" ,python-scanpy)
+ ("python-pybedtools" ,python-pybedtools)
+ ("python-pysam" ,python-pysam)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-seaborn" ,python-seaborn)
+ ("python-coolbox" ,python-coolbox)))
+ (home-page "https://github.com/BIMSBbioinfo/scregseg")
+ (synopsis "Single-cell regulatory landscape segmentation")
+ (description "Scregseg (Single-Cell REGulatory landscape SEGmentation) is a
+tool that facilitates the analysis of single cell ATAC-seq data by an
+HMM-based segmentation algorithm. Scregseg uses an HMM with
+Dirichlet-Multinomial emission probabilities to segment the genome either
+according to distinct relative cross-cell accessibility profiles or (after
+collapsing the single-cell tracks to pseudo-bulk tracks) to capture distinct
+cross-cluster accessibility profiles.")
+ (license license:gpl3+)))
+
+(define-public megadepth
+ (package
+ (name "megadepth")
+ (version "1.1.1")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/ChristopherWilks/megadepth")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0hj69d2dgmk2zwgazik7xzc04fxxlk93p888kpgc52fmhd95qph7"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:tests? #false ; some tests seem to require connection to
+ ; www.ebi.ac.uk; this may be caused by htslib.
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'prepare-CMakeLists.txt
+ (lambda _
+ (rename-file "CMakeLists.txt.ci" "CMakeLists.txt")
+ (substitute* "CMakeLists.txt"
+ (("`cat ../VERSION`") ,version)
+ (("target_link_libraries\\(megadepth_static") "#")
+ (("target_link_libraries\\(megadepth_statlib") "#")
+ (("add_executable\\(megadepth_static") "#")
+ (("add_executable\\(megadepth_statlib") "#"))
+
+ (substitute* "tests/test.sh"
+ ;; Disable remote test
+ (("./megadepth http://stingray.cs.jhu.edu/data/temp/test.bam") "#")
+ ;; Prior to installation the binary's name differs from what
+ ;; the test script assumes.
+ (("./megadepth") "../build/megadepth_dynamic"))))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (with-directory-excursion "../source"
+ (invoke "bash" "tests/test.sh" "use-local-test-data")))))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
+ (mkdir-p bin)
+ (copy-file "megadepth_dynamic"
+ (string-append bin "/megadepth"))))))))
+ (native-inputs
+ `(("diffutils" ,diffutils)
+ ("perl" ,perl)
+ ("grep" ,grep)))
+ (inputs
+ `(("curl" ,curl)
+ ("htslib" ,htslib)
+ ("libdeflate" ,libdeflate)
+ ("libbigwig" ,libbigwig)
+ ("zlib" ,zlib)))
+ (home-page "https://github.com/ChristopherWilks/megadepth")
+ (synopsis "BigWig and BAM/CRAM related utilities")
+ (description "Megadepth is an efficient tool for extracting coverage
+related information from RNA and DNA-seq BAM and BigWig files. It supports
+reading whole-genome coverage from BAM files and writing either indexed TSV or
+BigWig files, as well as efficient region coverage summary over intervals from
+both types of files.")
+ (license license:expat)))
+
(define-public r-ascat
(package
(name "r-ascat")
@@ -14737,6 +15054,34 @@ copy number estimation, as described by
@url{doi:10.1016/j.cell.2012.04.023,Nik-Zainal et al.}")
(license license:gpl3)))
+(define-public r-catch
+ (let ((commit "196ddd5a51b1a5f5daa01de53fdaad9b7505e084")
+ (revision "1"))
+ (package
+ (name "r-catch")
+ (version (git-version "1.0" revision commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/zhanyinx/CaTCH")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "11c7f1fc8f57wnwk1hrgr5y814m80zj8gkz5021vxyxy2v02cqgd"))))
+ (build-system r-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'chdir
+ (lambda _ (chdir "CaTCH"))))))
+ (home-page "https://github.com/zhanyinx/CaTCH_R")
+ (synopsis "Call a hierarchy of domains based on Hi-C data")
+ (description "This package allows building the hierarchy of domains
+starting from Hi-C data. Each hierarchical level is identified by a minimum
+value of physical insulation between neighboring domains.")
+ (license license:gpl2+))))
+
(define-public r-spectre
(let ((commit "f6648ab3eb9499300d86502b5d60ec370ae9b61a")
(revision "1"))
@@ -14808,3 +15153,42 @@ copy number estimation, as described by
integration, exploration, and analysis of high-dimensional single-cell
cytometry and imaging data.")
(license license:expat))))
+
+(define-public r-cytonorm
+ (let ((commit "e4b9d343ee65db3c422800f1db3e77c25abde987")
+ (revision "1"))
+ (package
+ (name "r-cytonorm")
+ (version (git-version "0.0.7" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/saeyslab/CytoNorm")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0h2rdy15i4zymd4dv60n5w0frbsdbmzpv99dgm0l2dn041qv7fah"))))
+ (properties `((upstream-name . "CytoNorm")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-cytoml" ,r-cytoml)
+ ("r-dplyr" ,r-dplyr)
+ ("r-emdist" ,r-emdist)
+ ("r-flowcore" ,r-flowcore)
+ ("r-flowsom" ,r-flowsom)
+ ("r-flowworkspace" ,r-flowworkspace)
+ ("r-ggplot2" ,r-ggplot2)
+ ("r-gridextra" ,r-gridextra)
+ ("r-pheatmap" ,r-pheatmap)
+ ("r-stringr" ,r-stringr)))
+ (home-page "https://github.com/saeyslab/CytoNorm")
+ (synopsis "Normalize cytometry data measured across multiple batches")
+ (description
+ "This package can be used to normalize cytometry samples when a control
+sample is taken along in each of the batches. This is done by first
+identifying multiple clusters/cell types, learning the batch effects from the
+control samples and applying quantile normalization on all markers of
+interest.")
+ (license license:gpl2+))))