diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 648 |
1 files changed, 617 insertions, 31 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index c930099179..5a1738b936 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -468,6 +468,47 @@ frames} (ORFs) using ribosome profiling (ribo-seq) data. This package provides the Ribotaper pipeline.") (license license:gpl3+))) +(define-public ribodiff + (package + (name "ribodiff") + (version "0.2.2") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/ratschlab/RiboDiff/" + "archive/v" version ".tar.gz")) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj")))) + (build-system python-build-system) + (arguments + `(#:python ,python-2 + #:phases + (modify-phases %standard-phases + ;; Generate an installable executable script wrapper. + (add-after 'unpack 'patch-setup.py + (lambda _ + (substitute* "setup.py" + (("^(.*)packages=.*" line prefix) + (string-append line "\n" + prefix "scripts=['scripts/TE.py'],\n"))) + #t))))) + (inputs + `(("python-numpy" ,python2-numpy) + ("python-matplotlib" ,python2-matplotlib) + ("python-scipy" ,python2-scipy) + ("python-statsmodels" ,python2-statsmodels))) + (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/") + (synopsis "Detect translation efficiency changes from ribosome footprints") + (description "RiboDiff is a statistical tool that detects the protein +translational efficiency change from Ribo-Seq (ribosome footprinting) and +RNA-Seq data. It uses a generalized linear model to detect genes showing +difference in translational profile taking mRNA abundance into account. It +facilitates us to decipher the translational regulation that behave +independently with transcriptional regulation.") + (license license:gpl3+))) + (define-public bioawk (package (name "bioawk") @@ -2010,7 +2051,7 @@ identify enrichments with functional annotations of the genome.") (define-public diamond (package (name "diamond") - (version "0.8.31") + (version "0.8.34") (source (origin (method url-fetch) (uri (string-append @@ -2019,7 +2060,7 @@ identify enrichments with functional annotations of the genome.") (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 - "0nh79f4rpgq8vmlga743r7vd0z0ik6spy34f7vfq0v9lcmvfr7xq")))) + "0jvr34g346gbz7z1zb9bs0vplivm9p4cxk0lbzklvdpa7g236p39")))) (build-system cmake-build-system) (arguments '(#:tests? #f ; no "check" target @@ -2726,6 +2767,69 @@ several alignment strategies enable effective alignment of RNA-seq reads, in particular, reads spanning multiple exons.") (license license:gpl3+))) +(define-public hisat2 + (package + (name "hisat2") + (version "2.0.5") + (source + (origin + (method url-fetch) + ;; FIXME: a better source URL is + ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2" + ;; "/downloads/hisat2-" version "-source.zip") + ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g" + ;; but it is currently unavailable. + (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz") + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50")))) + (build-system gnu-build-system) + (arguments + `(#:tests? #f ; no check target + #:make-flags (list "CC=gcc" "CXX=g++" "allall") + #:modules ((guix build gnu-build-system) + (guix build utils) + (srfi srfi-26)) + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'make-deterministic + (lambda _ + (substitute* "Makefile" + (("`date`") "0")) + #t)) + (delete 'configure) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin/")) + (doc (string-append out "/share/doc/hisat2/"))) + (for-each + (cut install-file <> bin) + (find-files "." + "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")) + (mkdir-p doc) + (install-file "doc/manual.inc.html" doc)) + #t))))) + (native-inputs + `(("unzip" ,unzip) ; needed for archive from ftp + ("perl" ,perl) + ("pandoc" ,ghc-pandoc))) ; for documentation + (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml") + (synopsis "Graph-based alignment of genomic sequencing reads") + (description "HISAT2 is a fast and sensitive alignment program for mapping +next-generation sequencing reads (both DNA and RNA) to a population of human +genomes (as well as to a single reference genome). In addition to using one +global @dfn{graph FM} (GFM) index that represents a population of human +genomes, HISAT2 uses a large set of small GFM indexes that collectively cover +the whole genome. These small indexes, combined with several alignment +strategies, enable rapid and accurate alignment of sequencing reads. This new +indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).") + ;; HISAT2 contains files from Bowtie2, which is released under + ;; GPLv2 or later. The HISAT2 source files are released under + ;; GPLv3 or later. + (license license:gpl3+))) + (define-public hmmer (package (name "hmmer") @@ -3383,7 +3487,6 @@ that a read originated from a particular isoform.") (version "3.8.1551") (source (origin (method url-fetch/tarbomb) - (file-name (string-append name "-" version)) (uri (string-append "http://www.drive5.com/muscle/muscle_src_" version ".tar.gz")) @@ -5148,17 +5251,41 @@ sequence.") (supported-systems '("i686-linux" "x86_64-linux")) (license license:bsd-3))) +(define-public r-centipede + (package + (name "r-centipede") + (version "1.2") + (source (origin + (method url-fetch) + (uri (string-append "http://download.r-forge.r-project.org/" + "src/contrib/CENTIPEDE_" version ".tar.gz")) + (sha256 + (base32 + "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9")))) + (build-system r-build-system) + (home-page "http://centipede.uchicago.edu/") + (synopsis "Predict transcription factor binding sites") + (description + "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions +of the genome that are bound by particular transcription factors. It starts +by identifying a set of candidate binding sites, and then aims to classify the +sites according to whether each site is bound or not bound by a transcription +factor. CENTIPEDE is an unsupervised learning algorithm that discriminates +between two different types of motif instances using as much relevant +information as possible.") + (license (list license:gpl2+ license:gpl3+)))) + (define-public r-vegan (package (name "r-vegan") - (version "2.4-1") + (version "2.4-2") (source (origin (method url-fetch) (uri (cran-uri "vegan" version)) (sha256 (base32 - "0i0c7rc0nzgbysd1nlxzxd2rvy75qcnw3yc7nggzqjzzj5d7yzsd")))) + "12wf64izrpq9z3ix7mgm5421mq0xsm8dw5qblvcrz452nfhjf5w9")))) (build-system r-build-system) (arguments `(#:phases @@ -5610,7 +5737,7 @@ track. The database is exposed as a @code{TxDb} object.") (define-public vsearch (package (name "vsearch") - (version "2.3.4") + (version "2.4.0") (source (origin (method url-fetch) @@ -5620,31 +5747,12 @@ track. The database is exposed as a @code{TxDb} object.") (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 - "1xyraxmhyx62mxx8z7c8waygvcijwkh48ms1ar60w2cv2y2sn4al")) - (modules '((guix build utils))) + "007q9a50hdw4vs2iajabvbw7qccml4r8cbqzyi5ipkkf42jk3vnr")) + (patches (search-patches "vsearch-unbundle-cityhash.patch")) (snippet '(begin - ;; Remove bundled cityhash and '-mtune=native'. - (substitute* "src/Makefile.am" - (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \ --O3 -mtune=native -Wall -Wsign-compare") - (string-append "AM_CXXFLAGS=-lcityhash" - " -O3 -Wall -Wsign-compare")) - (("^__top_builddir__bin_vsearch_SOURCES = city.h \\\\") - "__top_builddir__bin_vsearch_SOURCES = \\") - (("^city.h \\\\") "\\") - (("^citycrc.h \\\\") "\\") - (("^libcityhash_a.*") "") - (("noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a \ -libcityhash.a") - "noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a") - (("__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \ -libcpu_sse2.a libcityhash.a") - "__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \ -libcpu_sse2.a -lcityhash")) - (substitute* "src/vsearch.h" - (("^\\#include \"city.h\"") "#include <city.h>") - (("^\\#include \"citycrc.h\"") "#include <citycrc.h>")) + ;; Remove bundled cityhash sources. The vsearch source is adjusted + ;; for this in the patch. (delete-file "src/city.h") (delete-file "src/citycrc.h") (delete-file "src/city.cc") @@ -6186,7 +6294,8 @@ names in their natural, rather than lexicographic, order.") (build-system r-build-system) (propagated-inputs `(("r-limma" ,r-limma) - ("r-locfit" ,r-locfit))) + ("r-locfit" ,r-locfit) + ("r-statmod" ,r-statmod))) ;for estimateDisp (home-page "http://bioinf.wehi.edu.au/edgeR") (synopsis "EdgeR does empirical analysis of digital gene expression data") (description "This package can do differential expression analysis of @@ -6746,6 +6855,37 @@ dependencies between GO terms can be implemented and applied.") genome data packages and support for efficient SNP representation.") (license license:artistic2.0))) +(define-public r-bsgenome-hsapiens-1000genomes-hs37d5 + (package + (name "r-bsgenome-hsapiens-1000genomes-hs37d5") + (version "0.99.1") + (source (origin + (method url-fetch) + ;; We cannot use bioconductor-uri here because this tarball is + ;; located under "data/annotation/" instead of "bioc/". + (uri (string-append "http://www.bioconductor.org/packages/" + "release/data/annotation/src/contrib/" + "BSgenome.Hsapiens.1000genomes.hs37d5_" + version ".tar.gz")) + (sha256 + (base32 + "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr")))) + (properties + `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5"))) + (build-system r-build-system) + ;; As this package provides little more than a very large data file it + ;; doesn't make sense to build substitutes. + (arguments `(#:substitutable? #f)) + (propagated-inputs + `(("r-bsgenome" ,r-bsgenome))) + (home-page + "http://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/") + (synopsis "Full genome sequences for Homo sapiens") + (description + "This package provides full genome sequences for Homo sapiens from +1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.") + (license license:artistic2.0))) + (define-public r-impute (package (name "r-impute") @@ -7630,6 +7770,71 @@ for DNA and protein sequences. This package supports several sequence kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.") (license license:gpl2+))) +(define-public r-tximport + (package + (name "r-tximport") + (version "1.2.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "tximport" version)) + (sha256 + (base32 + "1k5a7dad6zqg936s17f6cmwgqp11x24z9zhxndsgwbscgpyhpcb0")))) + (build-system r-build-system) + (home-page "http://bioconductor.org/packages/tximport") + (synopsis "Import and summarize transcript-level estimates for gene-level analysis") + (description + "This package provides tools to import transcript-level abundance, +estimated counts and transcript lengths, and to summarize them into matrices +for use with downstream gene-level analysis packages. Average transcript +length, weighted by sample-specific transcript abundance estimates, is +provided as a matrix which can be used as an offset for different expression +of gene-level counts.") + (license license:gpl2+))) + +(define-public r-rhdf5 + (package + (name "r-rhdf5") + (version "2.18.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "rhdf5" version)) + (sha256 + (base32 + "0pb04li55ysag30s7rap7nnivc0rqmgsmpj43kin0rxdabfn1w0k")))) + (build-system r-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'unpack-smallhdf5 + (lambda* (#:key outputs #:allow-other-keys) + (system* "tar" "-xzvf" + "src/hdf5source/hdf5small.tgz" "-C" "src/" ) + (substitute* "src/Makevars" + (("^.*cd hdf5source &&.*$") "") + (("^.*gunzip -dc hdf5small.tgz.*$") "") + (("^.*rm -rf hdf5.*$") "") + (("^.*mv hdf5source/hdf5 ..*$") "")) + (substitute* "src/hdf5/configure" + (("/bin/mv") "mv")) + #t))))) + (propagated-inputs + `(("r-zlibbioc" ,r-zlibbioc))) + (inputs + `(("perl" ,perl) + ("zlib" ,zlib))) + (home-page "http://bioconductor.org/packages/rhdf5") + (synopsis "HDF5 interface to R") + (description + "This R/Bioconductor package provides an interface between HDF5 and R. +HDF5's main features are the ability to store and access very large and/or +complex datasets and a wide variety of metadata on mass storage (disk) through +a completely portable file format. The rhdf5 package is thus suited for the +exchange of large and/or complex datasets between R and other software +package, and for letting R applications work on datasets that are larger than +the available RAM.") + (license license:artistic2.0))) + (define-public emboss (package (name "emboss") @@ -7885,7 +8090,9 @@ replacement for strverscmp.") ("python-pyyaml" ,python-pyyaml) ("python-click" ,python-click) ("python-matplotlib" ,python-matplotlib) - ("python-numpy" ,python-numpy))) + ("python-numpy" ,python-numpy) + ;; MultQC checks for the presence of nose at runtime. + ("python-nose" ,python-nose))) (home-page "http://multiqc.info") (synopsis "Aggregate bioinformatics analysis reports") (description @@ -7984,3 +8191,382 @@ immunoprecipitation and target enrichment on small gene panels. Thereby, CopywriteR constitutes a widely applicable alternative to available copy number detection tools.") (license license:gpl2))) + +(define-public r-sva + (package + (name "r-sva") + (version "3.22.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "sva" version)) + (sha256 + (base32 + "1wc1fjm6dzlsqqagm43y57w8jh8nsh0r0m8z1p6ximcb5gxqh7hn")))) + (build-system r-build-system) + (propagated-inputs + `(("r-genefilter" ,r-genefilter))) + (home-page "http://bioconductor.org/packages/sva") + (synopsis "Surrogate variable analysis") + (description + "This package contains functions for removing batch effects and other +unwanted variation in high-throughput experiment. It also contains functions +for identifying and building surrogate variables for high-dimensional data +sets. Surrogate variables are covariates constructed directly from +high-dimensional data like gene expression/RNA sequencing/methylation/brain +imaging data that can be used in subsequent analyses to adjust for unknown, +unmodeled, or latent sources of noise.") + (license license:artistic2.0))) + +(define-public r-seqminer + (package + (name "r-seqminer") + (version "5.3") + (source + (origin + (method url-fetch) + (uri (cran-uri "seqminer" version)) + (sha256 + (base32 + "0y0gc5lws3hdxasjb84m532ics6imb7qg9sl1zy62h503jh4j9gw")))) + (build-system r-build-system) + (inputs + `(("zlib" ,zlib))) + (home-page "http://seqminer.genomic.codes") + (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)") + (description + "This package provides tools to integrate nucleotide sequencing +data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.") + ;; Any version of the GPL is acceptable + (license (list license:gpl2+ license:gpl3+)))) + +(define-public r-raremetals2 + (package + (name "r-raremetals2") + (version "0.1") + (source + (origin + (method url-fetch) + (uri (string-append "http://genome.sph.umich.edu/w/images/" + "b/b7/RareMETALS2_" version ".tar.gz")) + (sha256 + (base32 + "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s")))) + (properties `((upstream-name . "RareMETALS2"))) + (build-system r-build-system) + (propagated-inputs + `(("r-seqminer" ,r-seqminer) + ("r-mvtnorm" ,r-mvtnorm) + ("r-compquadform" ,r-compquadform) + ("r-getopt" ,r-getopt))) + (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2") + (synopsis "Analyze gene-level association tests for binary trait") + (description + "The R package rareMETALS2 is an extension of the R package rareMETALS. +It was designed to meta-analyze gene-level association tests for binary trait. +While rareMETALS offers a near-complete solution for meta-analysis of +gene-level tests for quantitative trait, it does not offer the optimal +solution for binary trait. The package rareMETALS2 offers improved features +for analyzing gene-level association tests in meta-analyses for binary +trait.") + (license license:gpl3))) + +(define-public r-maldiquant + (package + (name "r-maldiquant") + (version "1.16") + (source + (origin + (method url-fetch) + (uri (cran-uri "MALDIquant" version)) + (sha256 + (base32 + "067xbmy10mpsvmv77g62chd7wwhdhcfn5hmp5fisbnz2h5rq0q60")))) + (properties `((upstream-name . "MALDIquant"))) + (build-system r-build-system) + (home-page "http://cran.r-project.org/web/packages/MALDIquant") + (synopsis "Quantitative analysis of mass spectrometry data") + (description + "This package provides a complete analysis pipeline for matrix-assisted +laser desorption/ionization-time-of-flight (MALDI-TOF) and other +two-dimensional mass spectrometry data. In addition to commonly used plotting +and processing methods it includes distinctive features, namely baseline +subtraction methods such as morphological filters (TopHat) or the +statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak +alignment using warping functions, handling of replicated measurements as well +as allowing spectra with different resolutions.") + (license license:gpl3+))) + +(define-public r-protgenerics + (package + (name "r-protgenerics") + (version "1.6.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "ProtGenerics" version)) + (sha256 + (base32 + "0hb3vrrvfx6lcfalmjxm8dmigfmi5nba0pzjfgsrzd35c8mbfc6f")))) + (properties `((upstream-name . "ProtGenerics"))) + (build-system r-build-system) + (home-page "https://github.com/lgatto/ProtGenerics") + (synopsis "S4 generic functions for proteomics infrastructure") + (description + "This package provides S4 generic functions needed by Bioconductor +proteomics packages.") + (license license:artistic2.0))) + +(define-public r-mzr + (package + (name "r-mzr") + (version "2.8.1") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "mzR" version)) + (sha256 + (base32 + "0ipmhg6l3pf648rdx5g2ha7l5ppd3cja6afxhdw76x8ga3633x0r")))) + (properties `((upstream-name . "mzR"))) + (build-system r-build-system) + (inputs + `(("netcdf" ,netcdf))) + (propagated-inputs + `(("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-protgenerics" ,r-protgenerics) + ("r-rcpp" ,r-rcpp) + ("r-zlibbioc" ,r-zlibbioc))) + (home-page "https://github.com/sneumann/mzR/") + (synopsis "Parser for mass spectrometry data files") + (description + "The mzR package provides a unified API to the common file formats and +parsers available for mass spectrometry data. It comes with a wrapper for the +ISB random access parser for mass spectrometry mzXML, mzData and mzML files. +The package contains the original code written by the ISB, and a subset of the +proteowizard library for mzML and mzIdentML. The netCDF reading code has +previously been used in XCMS.") + (license license:artistic2.0))) + +(define-public r-affyio + (package + (name "r-affyio") + (version "1.44.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "affyio" version)) + (sha256 + (base32 + "1svsl4mpk06xm505pap913x69ywks99262krag8y4ygpllj7dfyy")))) + (build-system r-build-system) + (propagated-inputs + `(("r-zlibbioc" ,r-zlibbioc))) + (inputs + `(("zlib" ,zlib))) + (home-page "https://github.com/bmbolstad/affyio") + (synopsis "Tools for parsing Affymetrix data files") + (description + "This package provides routines for parsing Affymetrix data files based +upon file format information. The primary focus is on accessing the CEL and +CDF file formats.") + (license license:lgpl2.0+))) + +(define-public r-affy + (package + (name "r-affy") + (version "1.52.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "affy" version)) + (sha256 + (base32 + "1snq71ligf0wvaxa6zfrl13ydw0zfhspmhdyfk8q3ba3np4cz344")))) + (build-system r-build-system) + (propagated-inputs + `(("r-affyio" ,r-affyio) + ("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-biocinstaller" ,r-biocinstaller) + ("r-preprocesscore" ,r-preprocesscore) + ("r-zlibbioc" ,r-zlibbioc))) + (home-page "http://bioconductor.org/packages/affy") + (synopsis "Methods for affymetrix oligonucleotide arrays") + (description + "This package contains functions for exploratory oligonucleotide array +analysis.") + (license license:lgpl2.0+))) + +(define-public r-vsn + (package + (name "r-vsn") + (version "3.42.3") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "vsn" version)) + (sha256 + (base32 + "0mgl0azys2g90simf8wx6jdwd7gyg3m4pf12n6w6507jixm2cg97")))) + (build-system r-build-system) + (propagated-inputs + `(("r-affy" ,r-affy) + ("r-biobase" ,r-biobase) + ("r-ggplot2" ,r-ggplot2) + ("r-limma" ,r-limma))) + (home-page "http://bioconductor.org/packages/release/bioc/html/vsn.html") + (synopsis "Variance stabilization and calibration for microarray data") + (description + "The package implements a method for normalising microarray intensities, +and works for single- and multiple-color arrays. It can also be used for data +from other technologies, as long as they have similar format. The method uses +a robust variant of the maximum-likelihood estimator for an +additive-multiplicative error model and affine calibration. The model +incorporates data calibration step (a.k.a. normalization), a model for the +dependence of the variance on the mean intensity and a variance stabilizing +data transformation. Differences between transformed intensities are +analogous to \"normalized log-ratios\". However, in contrast to the latter, +their variance is independent of the mean, and they are usually more sensitive +and specific in detecting differential transcription.") + (license license:artistic2.0))) + +(define-public r-mzid + (package + (name "r-mzid") + (version "1.12.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "mzID" version)) + (sha256 + (base32 + "1zn896cpfvqp1qmq5c4vcj933hb8rxwb6gkck1wqvr7393rpqy1q")))) + (properties `((upstream-name . "mzID"))) + (build-system r-build-system) + (propagated-inputs + `(("r-doparallel" ,r-doparallel) + ("r-foreach" ,r-foreach) + ("r-iterators" ,r-iterators) + ("r-plyr" ,r-plyr) + ("r-protgenerics" ,r-protgenerics) + ("r-rcpp" ,r-rcpp) + ("r-xml" ,r-xml))) + (home-page "http://bioconductor.org/packages/mzID") + (synopsis "Parser for mzIdentML files") + (description + "This package provides a parser for mzIdentML files implemented using the +XML package. The parser tries to be general and able to handle all types of +mzIdentML files with the drawback of having less pretty output than a vendor +specific parser.") + (license license:gpl2+))) + +(define-public r-pcamethods + (package + (name "r-pcamethods") + (version "1.66.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "pcaMethods" version)) + (sha256 + (base32 + "18mawhxw57pgpn87qha4mwki24gqja7wpqha8q496476vyap11xw")))) + (properties `((upstream-name . "pcaMethods"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-rcpp" ,r-rcpp))) + (home-page "https://github.com/hredestig/pcamethods") + (synopsis "Collection of PCA methods") + (description + "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA, +Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method +for missing value estimation is included for comparison. BPCA, PPCA and +NipalsPCA may be used to perform PCA on incomplete data as well as for +accurate missing value estimation. A set of methods for printing and plotting +the results is also provided. All PCA methods make use of the same data +structure (pcaRes) to provide a common interface to the PCA results.") + (license license:gpl3+))) + +(define-public r-msnbase + (package + (name "r-msnbase") + (version "2.0.2") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "MSnbase" version)) + (sha256 + (base32 + "0jjjs29dcwsjaxzfqxy98ycpg3rwxzzchkj77my3cjgdc00sm66n")))) + (properties `((upstream-name . "MSnbase"))) + (build-system r-build-system) + (propagated-inputs + `(("r-affy" ,r-affy) + ("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-biocparallel" ,r-biocparallel) + ("r-digest" ,r-digest) + ("r-ggplot2" ,r-ggplot2) + ("r-impute" ,r-impute) + ("r-iranges" ,r-iranges) + ("r-maldiquant" ,r-maldiquant) + ("r-mzid" ,r-mzid) + ("r-mzr" ,r-mzr) + ("r-pcamethods" ,r-pcamethods) + ("r-plyr" ,r-plyr) + ("r-preprocesscore" ,r-preprocesscore) + ("r-protgenerics" ,r-protgenerics) + ("r-rcpp" ,r-rcpp) + ("r-reshape2" ,r-reshape2) + ("r-s4vectors" ,r-s4vectors) + ("r-vsn" ,r-vsn) + ("r-xml" ,r-xml))) + (home-page "https://github.com/lgatto/MSnbase") + (synopsis "Base functions and classes for MS-based proteomics") + (description + "This package provides basic plotting, data manipulation and processing +of mass spectrometry based proteomics data.") + (license license:artistic2.0))) + +(define-public r-msnid + (package + (name "r-msnid") + (version "1.8.0") + (source + (origin + (method url-fetch) + (uri (bioconductor-uri "MSnID" version)) + (sha256 + (base32 + "0fkk3za39cxi0jyxmagmycjdslr2xf6vg3ylz14jyffqi0blw9d5")))) + (properties `((upstream-name . "MSnID"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biobase" ,r-biobase) + ("r-data-table" ,r-data-table) + ("r-doparallel" ,r-doparallel) + ("r-dplyr" ,r-dplyr) + ("r-foreach" ,r-foreach) + ("r-iterators" ,r-iterators) + ("r-msnbase" ,r-msnbase) + ("r-mzid" ,r-mzid) + ("r-mzr" ,r-mzr) + ("r-protgenerics" ,r-protgenerics) + ("r-r-cache" ,r-r-cache) + ("r-rcpp" ,r-rcpp) + ("r-reshape2" ,r-reshape2))) + (home-page "http://bioconductor.org/packages/MSnID") + (synopsis "Utilities for LC-MSn proteomics identifications") + (description + "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data +from mzIdentML (leveraging the mzID package) or text files. After collating +the search results from multiple datasets it assesses their identification +quality and optimize filtering criteria to achieve the maximum number of +identifications while not exceeding a specified false discovery rate. It also +contains a number of utilities to explore the MS/MS results and assess missed +and irregular enzymatic cleavages, mass measurement accuracy, etc.") + (license license:artistic2.0))) |