aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm648
1 files changed, 617 insertions, 31 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index c930099179..5a1738b936 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -468,6 +468,47 @@ frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
provides the Ribotaper pipeline.")
(license license:gpl3+)))
+(define-public ribodiff
+ (package
+ (name "ribodiff")
+ (version "0.2.2")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/ratschlab/RiboDiff/"
+ "archive/v" version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:python ,python-2
+ #:phases
+ (modify-phases %standard-phases
+ ;; Generate an installable executable script wrapper.
+ (add-after 'unpack 'patch-setup.py
+ (lambda _
+ (substitute* "setup.py"
+ (("^(.*)packages=.*" line prefix)
+ (string-append line "\n"
+ prefix "scripts=['scripts/TE.py'],\n")))
+ #t)))))
+ (inputs
+ `(("python-numpy" ,python2-numpy)
+ ("python-matplotlib" ,python2-matplotlib)
+ ("python-scipy" ,python2-scipy)
+ ("python-statsmodels" ,python2-statsmodels)))
+ (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
+ (synopsis "Detect translation efficiency changes from ribosome footprints")
+ (description "RiboDiff is a statistical tool that detects the protein
+translational efficiency change from Ribo-Seq (ribosome footprinting) and
+RNA-Seq data. It uses a generalized linear model to detect genes showing
+difference in translational profile taking mRNA abundance into account. It
+facilitates us to decipher the translational regulation that behave
+independently with transcriptional regulation.")
+ (license license:gpl3+)))
+
(define-public bioawk
(package
(name "bioawk")
@@ -2010,7 +2051,7 @@ identify enrichments with functional annotations of the genome.")
(define-public diamond
(package
(name "diamond")
- (version "0.8.31")
+ (version "0.8.34")
(source (origin
(method url-fetch)
(uri (string-append
@@ -2019,7 +2060,7 @@ identify enrichments with functional annotations of the genome.")
(file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "0nh79f4rpgq8vmlga743r7vd0z0ik6spy34f7vfq0v9lcmvfr7xq"))))
+ "0jvr34g346gbz7z1zb9bs0vplivm9p4cxk0lbzklvdpa7g236p39"))))
(build-system cmake-build-system)
(arguments
'(#:tests? #f ; no "check" target
@@ -2726,6 +2767,69 @@ several alignment strategies enable effective alignment of RNA-seq reads, in
particular, reads spanning multiple exons.")
(license license:gpl3+)))
+(define-public hisat2
+ (package
+ (name "hisat2")
+ (version "2.0.5")
+ (source
+ (origin
+ (method url-fetch)
+ ;; FIXME: a better source URL is
+ ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
+ ;; "/downloads/hisat2-" version "-source.zip")
+ ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
+ ;; but it is currently unavailable.
+ (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ; no check target
+ #:make-flags (list "CC=gcc" "CXX=g++" "allall")
+ #:modules ((guix build gnu-build-system)
+ (guix build utils)
+ (srfi srfi-26))
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'make-deterministic
+ (lambda _
+ (substitute* "Makefile"
+ (("`date`") "0"))
+ #t))
+ (delete 'configure)
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin/"))
+ (doc (string-append out "/share/doc/hisat2/")))
+ (for-each
+ (cut install-file <> bin)
+ (find-files "."
+ "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
+ (mkdir-p doc)
+ (install-file "doc/manual.inc.html" doc))
+ #t)))))
+ (native-inputs
+ `(("unzip" ,unzip) ; needed for archive from ftp
+ ("perl" ,perl)
+ ("pandoc" ,ghc-pandoc))) ; for documentation
+ (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
+ (synopsis "Graph-based alignment of genomic sequencing reads")
+ (description "HISAT2 is a fast and sensitive alignment program for mapping
+next-generation sequencing reads (both DNA and RNA) to a population of human
+genomes (as well as to a single reference genome). In addition to using one
+global @dfn{graph FM} (GFM) index that represents a population of human
+genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
+the whole genome. These small indexes, combined with several alignment
+strategies, enable rapid and accurate alignment of sequencing reads. This new
+indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
+ ;; HISAT2 contains files from Bowtie2, which is released under
+ ;; GPLv2 or later. The HISAT2 source files are released under
+ ;; GPLv3 or later.
+ (license license:gpl3+)))
+
(define-public hmmer
(package
(name "hmmer")
@@ -3383,7 +3487,6 @@ that a read originated from a particular isoform.")
(version "3.8.1551")
(source (origin
(method url-fetch/tarbomb)
- (file-name (string-append name "-" version))
(uri (string-append
"http://www.drive5.com/muscle/muscle_src_"
version ".tar.gz"))
@@ -5148,17 +5251,41 @@ sequence.")
(supported-systems '("i686-linux" "x86_64-linux"))
(license license:bsd-3)))
+(define-public r-centipede
+ (package
+ (name "r-centipede")
+ (version "1.2")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "http://download.r-forge.r-project.org/"
+ "src/contrib/CENTIPEDE_" version ".tar.gz"))
+ (sha256
+ (base32
+ "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
+ (build-system r-build-system)
+ (home-page "http://centipede.uchicago.edu/")
+ (synopsis "Predict transcription factor binding sites")
+ (description
+ "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
+of the genome that are bound by particular transcription factors. It starts
+by identifying a set of candidate binding sites, and then aims to classify the
+sites according to whether each site is bound or not bound by a transcription
+factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
+between two different types of motif instances using as much relevant
+information as possible.")
+ (license (list license:gpl2+ license:gpl3+))))
+
(define-public r-vegan
(package
(name "r-vegan")
- (version "2.4-1")
+ (version "2.4-2")
(source
(origin
(method url-fetch)
(uri (cran-uri "vegan" version))
(sha256
(base32
- "0i0c7rc0nzgbysd1nlxzxd2rvy75qcnw3yc7nggzqjzzj5d7yzsd"))))
+ "12wf64izrpq9z3ix7mgm5421mq0xsm8dw5qblvcrz452nfhjf5w9"))))
(build-system r-build-system)
(arguments
`(#:phases
@@ -5610,7 +5737,7 @@ track. The database is exposed as a @code{TxDb} object.")
(define-public vsearch
(package
(name "vsearch")
- (version "2.3.4")
+ (version "2.4.0")
(source
(origin
(method url-fetch)
@@ -5620,31 +5747,12 @@ track. The database is exposed as a @code{TxDb} object.")
(file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "1xyraxmhyx62mxx8z7c8waygvcijwkh48ms1ar60w2cv2y2sn4al"))
- (modules '((guix build utils)))
+ "007q9a50hdw4vs2iajabvbw7qccml4r8cbqzyi5ipkkf42jk3vnr"))
+ (patches (search-patches "vsearch-unbundle-cityhash.patch"))
(snippet
'(begin
- ;; Remove bundled cityhash and '-mtune=native'.
- (substitute* "src/Makefile.am"
- (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \
--O3 -mtune=native -Wall -Wsign-compare")
- (string-append "AM_CXXFLAGS=-lcityhash"
- " -O3 -Wall -Wsign-compare"))
- (("^__top_builddir__bin_vsearch_SOURCES = city.h \\\\")
- "__top_builddir__bin_vsearch_SOURCES = \\")
- (("^city.h \\\\") "\\")
- (("^citycrc.h \\\\") "\\")
- (("^libcityhash_a.*") "")
- (("noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a \
-libcityhash.a")
- "noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a")
- (("__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
-libcpu_sse2.a libcityhash.a")
- "__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
-libcpu_sse2.a -lcityhash"))
- (substitute* "src/vsearch.h"
- (("^\\#include \"city.h\"") "#include <city.h>")
- (("^\\#include \"citycrc.h\"") "#include <citycrc.h>"))
+ ;; Remove bundled cityhash sources. The vsearch source is adjusted
+ ;; for this in the patch.
(delete-file "src/city.h")
(delete-file "src/citycrc.h")
(delete-file "src/city.cc")
@@ -6186,7 +6294,8 @@ names in their natural, rather than lexicographic, order.")
(build-system r-build-system)
(propagated-inputs
`(("r-limma" ,r-limma)
- ("r-locfit" ,r-locfit)))
+ ("r-locfit" ,r-locfit)
+ ("r-statmod" ,r-statmod))) ;for estimateDisp
(home-page "http://bioinf.wehi.edu.au/edgeR")
(synopsis "EdgeR does empirical analysis of digital gene expression data")
(description "This package can do differential expression analysis of
@@ -6746,6 +6855,37 @@ dependencies between GO terms can be implemented and applied.")
genome data packages and support for efficient SNP representation.")
(license license:artistic2.0)))
+(define-public r-bsgenome-hsapiens-1000genomes-hs37d5
+ (package
+ (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
+ (version "0.99.1")
+ (source (origin
+ (method url-fetch)
+ ;; We cannot use bioconductor-uri here because this tarball is
+ ;; located under "data/annotation/" instead of "bioc/".
+ (uri (string-append "http://www.bioconductor.org/packages/"
+ "release/data/annotation/src/contrib/"
+ "BSgenome.Hsapiens.1000genomes.hs37d5_"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
+ (properties
+ `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
+ (build-system r-build-system)
+ ;; As this package provides little more than a very large data file it
+ ;; doesn't make sense to build substitutes.
+ (arguments `(#:substitutable? #f))
+ (propagated-inputs
+ `(("r-bsgenome" ,r-bsgenome)))
+ (home-page
+ "http://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
+ (synopsis "Full genome sequences for Homo sapiens")
+ (description
+ "This package provides full genome sequences for Homo sapiens from
+1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
+ (license license:artistic2.0)))
+
(define-public r-impute
(package
(name "r-impute")
@@ -7630,6 +7770,71 @@ for DNA and protein sequences. This package supports several sequence
kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
(license license:gpl2+)))
+(define-public r-tximport
+ (package
+ (name "r-tximport")
+ (version "1.2.0")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "tximport" version))
+ (sha256
+ (base32
+ "1k5a7dad6zqg936s17f6cmwgqp11x24z9zhxndsgwbscgpyhpcb0"))))
+ (build-system r-build-system)
+ (home-page "http://bioconductor.org/packages/tximport")
+ (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
+ (description
+ "This package provides tools to import transcript-level abundance,
+estimated counts and transcript lengths, and to summarize them into matrices
+for use with downstream gene-level analysis packages. Average transcript
+length, weighted by sample-specific transcript abundance estimates, is
+provided as a matrix which can be used as an offset for different expression
+of gene-level counts.")
+ (license license:gpl2+)))
+
+(define-public r-rhdf5
+ (package
+ (name "r-rhdf5")
+ (version "2.18.0")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "rhdf5" version))
+ (sha256
+ (base32
+ "0pb04li55ysag30s7rap7nnivc0rqmgsmpj43kin0rxdabfn1w0k"))))
+ (build-system r-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'unpack-smallhdf5
+ (lambda* (#:key outputs #:allow-other-keys)
+ (system* "tar" "-xzvf"
+ "src/hdf5source/hdf5small.tgz" "-C" "src/" )
+ (substitute* "src/Makevars"
+ (("^.*cd hdf5source &&.*$") "")
+ (("^.*gunzip -dc hdf5small.tgz.*$") "")
+ (("^.*rm -rf hdf5.*$") "")
+ (("^.*mv hdf5source/hdf5 ..*$") ""))
+ (substitute* "src/hdf5/configure"
+ (("/bin/mv") "mv"))
+ #t)))))
+ (propagated-inputs
+ `(("r-zlibbioc" ,r-zlibbioc)))
+ (inputs
+ `(("perl" ,perl)
+ ("zlib" ,zlib)))
+ (home-page "http://bioconductor.org/packages/rhdf5")
+ (synopsis "HDF5 interface to R")
+ (description
+ "This R/Bioconductor package provides an interface between HDF5 and R.
+HDF5's main features are the ability to store and access very large and/or
+complex datasets and a wide variety of metadata on mass storage (disk) through
+a completely portable file format. The rhdf5 package is thus suited for the
+exchange of large and/or complex datasets between R and other software
+package, and for letting R applications work on datasets that are larger than
+the available RAM.")
+ (license license:artistic2.0)))
+
(define-public emboss
(package
(name "emboss")
@@ -7885,7 +8090,9 @@ replacement for strverscmp.")
("python-pyyaml" ,python-pyyaml)
("python-click" ,python-click)
("python-matplotlib" ,python-matplotlib)
- ("python-numpy" ,python-numpy)))
+ ("python-numpy" ,python-numpy)
+ ;; MultQC checks for the presence of nose at runtime.
+ ("python-nose" ,python-nose)))
(home-page "http://multiqc.info")
(synopsis "Aggregate bioinformatics analysis reports")
(description
@@ -7984,3 +8191,382 @@ immunoprecipitation and target enrichment on small gene panels. Thereby,
CopywriteR constitutes a widely applicable alternative to available copy
number detection tools.")
(license license:gpl2)))
+
+(define-public r-sva
+ (package
+ (name "r-sva")
+ (version "3.22.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "sva" version))
+ (sha256
+ (base32
+ "1wc1fjm6dzlsqqagm43y57w8jh8nsh0r0m8z1p6ximcb5gxqh7hn"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-genefilter" ,r-genefilter)))
+ (home-page "http://bioconductor.org/packages/sva")
+ (synopsis "Surrogate variable analysis")
+ (description
+ "This package contains functions for removing batch effects and other
+unwanted variation in high-throughput experiment. It also contains functions
+for identifying and building surrogate variables for high-dimensional data
+sets. Surrogate variables are covariates constructed directly from
+high-dimensional data like gene expression/RNA sequencing/methylation/brain
+imaging data that can be used in subsequent analyses to adjust for unknown,
+unmodeled, or latent sources of noise.")
+ (license license:artistic2.0)))
+
+(define-public r-seqminer
+ (package
+ (name "r-seqminer")
+ (version "5.3")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (cran-uri "seqminer" version))
+ (sha256
+ (base32
+ "0y0gc5lws3hdxasjb84m532ics6imb7qg9sl1zy62h503jh4j9gw"))))
+ (build-system r-build-system)
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "http://seqminer.genomic.codes")
+ (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
+ (description
+ "This package provides tools to integrate nucleotide sequencing
+data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
+ ;; Any version of the GPL is acceptable
+ (license (list license:gpl2+ license:gpl3+))))
+
+(define-public r-raremetals2
+ (package
+ (name "r-raremetals2")
+ (version "0.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "http://genome.sph.umich.edu/w/images/"
+ "b/b7/RareMETALS2_" version ".tar.gz"))
+ (sha256
+ (base32
+ "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
+ (properties `((upstream-name . "RareMETALS2")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-seqminer" ,r-seqminer)
+ ("r-mvtnorm" ,r-mvtnorm)
+ ("r-compquadform" ,r-compquadform)
+ ("r-getopt" ,r-getopt)))
+ (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
+ (synopsis "Analyze gene-level association tests for binary trait")
+ (description
+ "The R package rareMETALS2 is an extension of the R package rareMETALS.
+It was designed to meta-analyze gene-level association tests for binary trait.
+While rareMETALS offers a near-complete solution for meta-analysis of
+gene-level tests for quantitative trait, it does not offer the optimal
+solution for binary trait. The package rareMETALS2 offers improved features
+for analyzing gene-level association tests in meta-analyses for binary
+trait.")
+ (license license:gpl3)))
+
+(define-public r-maldiquant
+ (package
+ (name "r-maldiquant")
+ (version "1.16")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (cran-uri "MALDIquant" version))
+ (sha256
+ (base32
+ "067xbmy10mpsvmv77g62chd7wwhdhcfn5hmp5fisbnz2h5rq0q60"))))
+ (properties `((upstream-name . "MALDIquant")))
+ (build-system r-build-system)
+ (home-page "http://cran.r-project.org/web/packages/MALDIquant")
+ (synopsis "Quantitative analysis of mass spectrometry data")
+ (description
+ "This package provides a complete analysis pipeline for matrix-assisted
+laser desorption/ionization-time-of-flight (MALDI-TOF) and other
+two-dimensional mass spectrometry data. In addition to commonly used plotting
+and processing methods it includes distinctive features, namely baseline
+subtraction methods such as morphological filters (TopHat) or the
+statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
+alignment using warping functions, handling of replicated measurements as well
+as allowing spectra with different resolutions.")
+ (license license:gpl3+)))
+
+(define-public r-protgenerics
+ (package
+ (name "r-protgenerics")
+ (version "1.6.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "ProtGenerics" version))
+ (sha256
+ (base32
+ "0hb3vrrvfx6lcfalmjxm8dmigfmi5nba0pzjfgsrzd35c8mbfc6f"))))
+ (properties `((upstream-name . "ProtGenerics")))
+ (build-system r-build-system)
+ (home-page "https://github.com/lgatto/ProtGenerics")
+ (synopsis "S4 generic functions for proteomics infrastructure")
+ (description
+ "This package provides S4 generic functions needed by Bioconductor
+proteomics packages.")
+ (license license:artistic2.0)))
+
+(define-public r-mzr
+ (package
+ (name "r-mzr")
+ (version "2.8.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "mzR" version))
+ (sha256
+ (base32
+ "0ipmhg6l3pf648rdx5g2ha7l5ppd3cja6afxhdw76x8ga3633x0r"))))
+ (properties `((upstream-name . "mzR")))
+ (build-system r-build-system)
+ (inputs
+ `(("netcdf" ,netcdf)))
+ (propagated-inputs
+ `(("r-biobase" ,r-biobase)
+ ("r-biocgenerics" ,r-biocgenerics)
+ ("r-protgenerics" ,r-protgenerics)
+ ("r-rcpp" ,r-rcpp)
+ ("r-zlibbioc" ,r-zlibbioc)))
+ (home-page "https://github.com/sneumann/mzR/")
+ (synopsis "Parser for mass spectrometry data files")
+ (description
+ "The mzR package provides a unified API to the common file formats and
+parsers available for mass spectrometry data. It comes with a wrapper for the
+ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
+The package contains the original code written by the ISB, and a subset of the
+proteowizard library for mzML and mzIdentML. The netCDF reading code has
+previously been used in XCMS.")
+ (license license:artistic2.0)))
+
+(define-public r-affyio
+ (package
+ (name "r-affyio")
+ (version "1.44.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "affyio" version))
+ (sha256
+ (base32
+ "1svsl4mpk06xm505pap913x69ywks99262krag8y4ygpllj7dfyy"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-zlibbioc" ,r-zlibbioc)))
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "https://github.com/bmbolstad/affyio")
+ (synopsis "Tools for parsing Affymetrix data files")
+ (description
+ "This package provides routines for parsing Affymetrix data files based
+upon file format information. The primary focus is on accessing the CEL and
+CDF file formats.")
+ (license license:lgpl2.0+)))
+
+(define-public r-affy
+ (package
+ (name "r-affy")
+ (version "1.52.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "affy" version))
+ (sha256
+ (base32
+ "1snq71ligf0wvaxa6zfrl13ydw0zfhspmhdyfk8q3ba3np4cz344"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-affyio" ,r-affyio)
+ ("r-biobase" ,r-biobase)
+ ("r-biocgenerics" ,r-biocgenerics)
+ ("r-biocinstaller" ,r-biocinstaller)
+ ("r-preprocesscore" ,r-preprocesscore)
+ ("r-zlibbioc" ,r-zlibbioc)))
+ (home-page "http://bioconductor.org/packages/affy")
+ (synopsis "Methods for affymetrix oligonucleotide arrays")
+ (description
+ "This package contains functions for exploratory oligonucleotide array
+analysis.")
+ (license license:lgpl2.0+)))
+
+(define-public r-vsn
+ (package
+ (name "r-vsn")
+ (version "3.42.3")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "vsn" version))
+ (sha256
+ (base32
+ "0mgl0azys2g90simf8wx6jdwd7gyg3m4pf12n6w6507jixm2cg97"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-affy" ,r-affy)
+ ("r-biobase" ,r-biobase)
+ ("r-ggplot2" ,r-ggplot2)
+ ("r-limma" ,r-limma)))
+ (home-page "http://bioconductor.org/packages/release/bioc/html/vsn.html")
+ (synopsis "Variance stabilization and calibration for microarray data")
+ (description
+ "The package implements a method for normalising microarray intensities,
+and works for single- and multiple-color arrays. It can also be used for data
+from other technologies, as long as they have similar format. The method uses
+a robust variant of the maximum-likelihood estimator for an
+additive-multiplicative error model and affine calibration. The model
+incorporates data calibration step (a.k.a. normalization), a model for the
+dependence of the variance on the mean intensity and a variance stabilizing
+data transformation. Differences between transformed intensities are
+analogous to \"normalized log-ratios\". However, in contrast to the latter,
+their variance is independent of the mean, and they are usually more sensitive
+and specific in detecting differential transcription.")
+ (license license:artistic2.0)))
+
+(define-public r-mzid
+ (package
+ (name "r-mzid")
+ (version "1.12.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "mzID" version))
+ (sha256
+ (base32
+ "1zn896cpfvqp1qmq5c4vcj933hb8rxwb6gkck1wqvr7393rpqy1q"))))
+ (properties `((upstream-name . "mzID")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-doparallel" ,r-doparallel)
+ ("r-foreach" ,r-foreach)
+ ("r-iterators" ,r-iterators)
+ ("r-plyr" ,r-plyr)
+ ("r-protgenerics" ,r-protgenerics)
+ ("r-rcpp" ,r-rcpp)
+ ("r-xml" ,r-xml)))
+ (home-page "http://bioconductor.org/packages/mzID")
+ (synopsis "Parser for mzIdentML files")
+ (description
+ "This package provides a parser for mzIdentML files implemented using the
+XML package. The parser tries to be general and able to handle all types of
+mzIdentML files with the drawback of having less pretty output than a vendor
+specific parser.")
+ (license license:gpl2+)))
+
+(define-public r-pcamethods
+ (package
+ (name "r-pcamethods")
+ (version "1.66.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "pcaMethods" version))
+ (sha256
+ (base32
+ "18mawhxw57pgpn87qha4mwki24gqja7wpqha8q496476vyap11xw"))))
+ (properties `((upstream-name . "pcaMethods")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biobase" ,r-biobase)
+ ("r-biocgenerics" ,r-biocgenerics)
+ ("r-rcpp" ,r-rcpp)))
+ (home-page "https://github.com/hredestig/pcamethods")
+ (synopsis "Collection of PCA methods")
+ (description
+ "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
+Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
+for missing value estimation is included for comparison. BPCA, PPCA and
+NipalsPCA may be used to perform PCA on incomplete data as well as for
+accurate missing value estimation. A set of methods for printing and plotting
+the results is also provided. All PCA methods make use of the same data
+structure (pcaRes) to provide a common interface to the PCA results.")
+ (license license:gpl3+)))
+
+(define-public r-msnbase
+ (package
+ (name "r-msnbase")
+ (version "2.0.2")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "MSnbase" version))
+ (sha256
+ (base32
+ "0jjjs29dcwsjaxzfqxy98ycpg3rwxzzchkj77my3cjgdc00sm66n"))))
+ (properties `((upstream-name . "MSnbase")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-affy" ,r-affy)
+ ("r-biobase" ,r-biobase)
+ ("r-biocgenerics" ,r-biocgenerics)
+ ("r-biocparallel" ,r-biocparallel)
+ ("r-digest" ,r-digest)
+ ("r-ggplot2" ,r-ggplot2)
+ ("r-impute" ,r-impute)
+ ("r-iranges" ,r-iranges)
+ ("r-maldiquant" ,r-maldiquant)
+ ("r-mzid" ,r-mzid)
+ ("r-mzr" ,r-mzr)
+ ("r-pcamethods" ,r-pcamethods)
+ ("r-plyr" ,r-plyr)
+ ("r-preprocesscore" ,r-preprocesscore)
+ ("r-protgenerics" ,r-protgenerics)
+ ("r-rcpp" ,r-rcpp)
+ ("r-reshape2" ,r-reshape2)
+ ("r-s4vectors" ,r-s4vectors)
+ ("r-vsn" ,r-vsn)
+ ("r-xml" ,r-xml)))
+ (home-page "https://github.com/lgatto/MSnbase")
+ (synopsis "Base functions and classes for MS-based proteomics")
+ (description
+ "This package provides basic plotting, data manipulation and processing
+of mass spectrometry based proteomics data.")
+ (license license:artistic2.0)))
+
+(define-public r-msnid
+ (package
+ (name "r-msnid")
+ (version "1.8.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "MSnID" version))
+ (sha256
+ (base32
+ "0fkk3za39cxi0jyxmagmycjdslr2xf6vg3ylz14jyffqi0blw9d5"))))
+ (properties `((upstream-name . "MSnID")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biobase" ,r-biobase)
+ ("r-data-table" ,r-data-table)
+ ("r-doparallel" ,r-doparallel)
+ ("r-dplyr" ,r-dplyr)
+ ("r-foreach" ,r-foreach)
+ ("r-iterators" ,r-iterators)
+ ("r-msnbase" ,r-msnbase)
+ ("r-mzid" ,r-mzid)
+ ("r-mzr" ,r-mzr)
+ ("r-protgenerics" ,r-protgenerics)
+ ("r-r-cache" ,r-r-cache)
+ ("r-rcpp" ,r-rcpp)
+ ("r-reshape2" ,r-reshape2)))
+ (home-page "http://bioconductor.org/packages/MSnID")
+ (synopsis "Utilities for LC-MSn proteomics identifications")
+ (description
+ "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
+from mzIdentML (leveraging the mzID package) or text files. After collating
+the search results from multiple datasets it assesses their identification
+quality and optimize filtering criteria to achieve the maximum number of
+identifications while not exceeding a specified false discovery rate. It also
+contains a number of utilities to explore the MS/MS results and assess missed
+and irregular enzymatic cleavages, mass measurement accuracy, etc.")
+ (license license:artistic2.0)))