diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 692 |
1 files changed, 635 insertions, 57 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 76a1c17737..c6531d669f 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -1,6 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net> -;;; Copyright © 2015 Ben Woodcroft <donttrustben@gmail.com> +;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com> ;;; Copyright © 2015 Pjotr Prins <pjotr.guix@thebird.nl> ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr> ;;; @@ -41,12 +41,14 @@ #:use-module (gnu packages cpio) #:use-module (gnu packages file) #:use-module (gnu packages gawk) + #:use-module (gnu packages gcc) #:use-module (gnu packages java) #:use-module (gnu packages linux) #:use-module (gnu packages machine-learning) #:use-module (gnu packages maths) #:use-module (gnu packages mpi) #:use-module (gnu packages ncurses) + #:use-module (gnu packages pcre) #:use-module (gnu packages perl) #:use-module (gnu packages pkg-config) #:use-module (gnu packages popt) @@ -56,6 +58,7 @@ #:use-module (gnu packages statistics) #:use-module (gnu packages tbb) #:use-module (gnu packages textutils) + #:use-module (gnu packages time) #:use-module (gnu packages tls) #:use-module (gnu packages vim) #:use-module (gnu packages web) @@ -205,7 +208,7 @@ computational cluster.") (define-public bedtools (package (name "bedtools") - (version "2.24.0") + (version "2.25.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/arq5x/bedtools2/archive/v" @@ -213,8 +216,7 @@ computational cluster.") (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 - "0lnxrjvs3nnmb4bmskag1wg3h2hd80przz5q3xd0bvs7vyxrvpbl")) - (patches (list (search-patch "bedtools-32bit-compilation.patch"))))) + "1ywcy3yfwzhl905b51l0ffjia55h75vv3mw5xkvib04pp6pj548m")))) (build-system gnu-build-system) (native-inputs `(("python" ,python-2))) (inputs `(("samtools" ,samtools) @@ -222,25 +224,15 @@ computational cluster.") (arguments '(#:test-target "test" #:phases - (alist-cons-after - 'unpack 'patch-makefile-SHELL-definition - (lambda _ - ;; patch-makefile-SHELL cannot be used here as it does not - ;; yet patch definitions with `:='. Since changes to - ;; patch-makefile-SHELL result in a full rebuild, features - ;; of patch-makefile-SHELL are reimplemented here. - (substitute* "Makefile" - (("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n")))) - (alist-delete - 'configure - (alist-replace - 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) - (for-each (lambda (file) - (install-file file bin)) - (find-files "bin" ".*")))) - %standard-phases))))) + (modify-phases %standard-phases + (delete 'configure) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) + (for-each (lambda (file) + (install-file file bin)) + (find-files "bin" ".*"))) + #t))))) (home-page "https://github.com/arq5x/bedtools2") (synopsis "Tools for genome analysis and arithmetic") (description @@ -1064,14 +1056,14 @@ data and settings.") (define-public edirect (package (name "edirect") - (version "2.50") + (version "3.50") (source (origin (method url-fetch) ;; Note: older versions are not retained. - (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip") + (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.tar.gz") (sha256 (base32 - "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj")))) + "1cr3gzcs3flmgnnbj5iz93vh9w0fca1ilzi2q82cl63ln3mwvpz0")))) (build-system perl-build-system) (arguments `(#:tests? #f ;no "check" target @@ -1111,8 +1103,6 @@ data and settings.") ("perl-uri" ,perl-uri) ("perl-www-robotrules" ,perl-www-robotrules) ("perl" ,perl))) - (native-inputs - `(("unzip" ,unzip))) (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/") (synopsis "Tools for accessing the NCBI's set of databases") (description @@ -1436,6 +1426,71 @@ genes in incomplete assemblies or complete genomes.") ;; GPL3+ according to private correspondense with the authors. (license license:gpl3+))) +(define-public fxtract + (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115")) + (package + (name "fxtract") + (version "2.3") + (source + (origin + (method url-fetch) + (uri (string-append + "https://github.com/ctSkennerton/fxtract/archive/" + version ".tar.gz")) + (file-name (string-append "ctstennerton-util-" + (string-take util-commit 7) + "-checkout")) + (sha256 + (base32 + "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj")))) + (build-system gnu-build-system) + (arguments + `(#:make-flags (list + (string-append "PREFIX=" (assoc-ref %outputs "out")) + "CC=gcc") + #:test-target "fxtract_test" + #:phases + (modify-phases %standard-phases + (delete 'configure) + (add-before 'build 'copy-util + (lambda* (#:key inputs #:allow-other-keys) + (rmdir "util") + (copy-recursively (assoc-ref inputs "ctskennerton-util") "util") + #t)) + ;; Do not use make install as this requires additional dependencies. + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out"/bin"))) + (install-file "fxtract" bin) + #t)))))) + (inputs + `(("pcre" ,pcre) + ("zlib" ,zlib))) + (native-inputs + ;; ctskennerton-util is licensed under GPL2. + `(("ctskennerton-util" + ,(origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/ctSkennerton/util.git") + (commit util-commit))) + (file-name (string-append + "ctstennerton-util-" util-commit "-checkout")) + (sha256 + (base32 + "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7")))))) + (home-page "https://github.com/ctSkennerton/fxtract") + (synopsis "Extract sequences from FASTA and FASTQ files") + (description + "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA +or FASTQ) file given a subsequence. It uses a simple substring search for +basic tasks but can change to using POSIX regular expressions, PCRE, hash +lookups or multi-pattern searching as required. By default fxtract looks in +the sequence of each record but can also be told to look in the header, +comment or quality sections.") + (license license:expat)))) + (define-public grit (package (name "grit") @@ -1643,7 +1698,7 @@ from high-throughput sequencing assays.") '(configure install check))))) (native-inputs `(("ant" ,ant) - ("jdk" ,icedtea6 "jdk"))) + ("jdk" ,icedtea "jdk"))) (home-page "http://samtools.github.io/htsjdk/") (synopsis "Java API for high-throughput sequencing data (HTS) formats") (description @@ -1738,6 +1793,55 @@ to measure the reproducibility of findings identified from replicate experiments and provide highly stable thresholds based on reproducibility.") (license license:gpl3+))) +(define-public jellyfish + (package + (name "jellyfish") + (version "2.2.4") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/gmarcais/Jellyfish/" + "releases/download/v" version + "/jellyfish-" version ".tar.gz")) + (sha256 + (base32 + "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk")))) + (build-system gnu-build-system) + (outputs '("out" ;for library + "ruby" ;for Ruby bindings + "python")) ;for Python bindings + (arguments + `(#:configure-flags + (list (string-append "--enable-ruby-binding=" + (assoc-ref %outputs "ruby")) + (string-append "--enable-python-binding=" + (assoc-ref %outputs "python"))) + #:phases + (modify-phases %standard-phases + (add-before 'check 'set-SHELL-variable + (lambda _ + ;; generator_manager.hpp either uses /bin/sh or $SHELL + ;; to run tests. + (setenv "SHELL" (which "bash")) + #t))))) + (native-inputs + `(("bc" ,bc) + ("time" ,time) + ("ruby" ,ruby) + ("python" ,python-2))) + (synopsis "Tool for fast counting of k-mers in DNA") + (description + "Jellyfish is a tool for fast, memory-efficient counting of k-mers in +DNA. A k-mer is a substring of length k, and counting the occurrences of all +such substrings is a central step in many analyses of DNA sequence. Jellyfish +is a command-line program that reads FASTA and multi-FASTA files containing +DNA sequences. It outputs its k-mer counts in a binary format, which can be +translated into a human-readable text format using the @code{jellyfish dump} +command, or queried for specific k-mers with @code{jellyfish query}.") + (home-page "http://www.genome.umd.edu/jellyfish.html") + ;; The combined work is published under the GPLv3 or later. Individual + ;; files such as lib/jsoncpp.cpp are released under the Expat license. + (license (list license:gpl3+ license:expat)))) + (define-public macs (package (name "macs") @@ -2284,7 +2388,7 @@ distribution, coverage uniformity, strand specificity, etc.") (define-public samtools (package (name "samtools") - (version "1.2") + (version "1.3") (source (origin (method url-fetch) @@ -2293,38 +2397,23 @@ distribution, coverage uniformity, strand specificity, etc.") version "/samtools-" version ".tar.bz2")) (sha256 (base32 - "1akdqb685pk9xk1nb6sa9aq8xssjjhvvc06kp4cpdqvz2157l3j2")))) + "03mnf0mhbfwhqlqfslrhfnw68s3g0fs1as354i9a584mqw1l1smy")))) (build-system gnu-build-system) (arguments - `(;; There are 87 test failures when building on non-64-bit architectures - ;; due to invalid test data. This has since been fixed upstream (see - ;; <https://github.com/samtools/samtools/pull/307>), but as there has - ;; not been a new release we disable the tests for all non-64-bit - ;; systems. - #:tests? ,(string=? (or (%current-system) (%current-target-system)) - "x86_64-linux") - #:modules ((ice-9 ftw) + `(#:modules ((ice-9 ftw) (ice-9 regex) (guix build gnu-build-system) (guix build utils)) - #:make-flags (list "LIBCURSES=-lncurses" - (string-append "prefix=" (assoc-ref %outputs "out"))) + #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out"))) + #:configure-flags (list "--with-ncurses") #:phases (alist-cons-after - 'unpack - 'patch-tests - (lambda* (#:key inputs #:allow-other-keys) - (let ((bash (assoc-ref inputs "bash"))) - (substitute* "test/test.pl" - ;; The test script calls out to /bin/bash - (("/bin/bash") - (string-append bash "/bin/bash")) - ;; There are two failing tests upstream relating to the "stats" - ;; subcommand in test_usage_subcommand ("did not have Usage" - ;; and "usage did not mention samtools stats"), so we disable - ;; them. - (("(test_usage_subcommand\\(.*\\);)" cmd) - (string-append "unless ($subcommand eq 'stats') {" cmd "};"))))) + 'unpack 'patch-tests + (lambda _ + (substitute* "test/test.pl" + ;; The test script calls out to /bin/bash + (("/bin/bash") (which "bash"))) + #t) (alist-cons-after 'install 'install-library (lambda* (#:key outputs #:allow-other-keys) @@ -2339,7 +2428,7 @@ distribution, coverage uniformity, strand specificity, etc.") (install-file file include)) (scandir "." (lambda (name) (string-match "\\.h$" name)))) #t)) - (alist-delete 'configure %standard-phases)))))) + %standard-phases))))) (native-inputs `(("pkg-config" ,pkg-config))) (inputs `(("ncurses" ,ncurses) ("perl" ,perl) @@ -2489,7 +2578,7 @@ simultaneously.") `(modify-phases ,phases (replace 'enter-dir (lambda _ (chdir "ngs-java") #t))))))) (inputs - `(("jdk" ,icedtea6 "jdk") + `(("jdk" ,icedtea "jdk") ("ngs-sdk" ,ngs-sdk))) (synopsis "Java bindings for NGS SDK"))) @@ -3418,6 +3507,495 @@ GenomicRanges package defines general purpose containers for storing and manipulating genomic intervals and variables defined along a genome.") (license license:artistic2.0))) +(define-public r-biobase + (package + (name "r-biobase") + (version "2.30.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "Biobase" version)) + (sha256 + (base32 + "1qasjpq3kw8h7qw8cin3bjvv1256hqr1mm24fq3v0ymxzlb66szi")))) + (properties + `((upstream-name . "Biobase"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics))) + (home-page "http://bioconductor.org/packages/Biobase") + (synopsis "Base functions for Bioconductor") + (description + "This package provides functions that are needed by many other packages +on Bioconductor or which replace R functions.") + (license license:artistic2.0))) + +(define-public r-annotationdbi + (package + (name "r-annotationdbi") + (version "1.32.2") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "AnnotationDbi" version)) + (sha256 + (base32 + "08ncdjvq0l44kqyiv32kn9wnbw1xgfb6qjfzfbjpqrcfp1jygz9j")))) + (properties + `((upstream-name . "AnnotationDbi"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-dbi" ,r-dbi) + ("r-iranges" ,r-iranges) + ("r-rsqlite" ,r-rsqlite) + ("r-s4vectors" ,r-s4vectors))) + (home-page "http://bioconductor.org/packages/AnnotationDbi") + (synopsis "Annotation database interface") + (description + "This package provides user interface and database connection code for +annotation data packages using SQLite data storage.") + (license license:artistic2.0))) + +(define-public r-biomart + (package + (name "r-biomart") + (version "2.26.1") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "biomaRt" version)) + (sha256 + (base32 + "1s709055abj2gd35g6nnk5d2ai5ii09iir270l2xika6pi62gj3f")))) + (properties + `((upstream-name . "biomaRt"))) + (build-system r-build-system) + (propagated-inputs + `(("r-annotationdbi" ,r-annotationdbi) + ("r-rcurl" ,r-rcurl) + ("r-xml" ,r-xml))) + (home-page "http://bioconductor.org/packages/biomaRt") + (synopsis "Interface to BioMart databases") + (description + "biomaRt provides an interface to a growing collection of databases +implementing the @url{BioMart software suite, http://www.biomart.org}. The +package enables retrieval of large amounts of data in a uniform way without +the need to know the underlying database schemas or write complex SQL queries. +Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene, +Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt +users direct access to a diverse set of data and enable a wide range of +powerful online queries from gene annotation to database mining.") + (license license:artistic2.0))) + +(define-public r-biocparallel + (package + (name "r-biocparallel") + (version "1.4.3") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "BiocParallel" version)) + (sha256 + (base32 + "1f5mndx66vampcsq0n66afg6x851crl0h3nyv2nyp9bsgzj9cdzq")))) + (properties + `((upstream-name . "BiocParallel"))) + (build-system r-build-system) + (propagated-inputs + `(("r-futile-logger" ,r-futile-logger) + ("r-snow" ,r-snow))) + (home-page "http://bioconductor.org/packages/BiocParallel") + (synopsis "Bioconductor facilities for parallel evaluation") + (description + "This package provides modified versions and novel implementation of +functions for parallel evaluation, tailored to use with Bioconductor +objects.") + (license (list license:gpl2+ license:gpl3+)))) + +(define-public r-biostrings + (package + (name "r-biostrings") + (version "2.38.2") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "Biostrings" version)) + (sha256 + (base32 + "1afp9szc8ci6jn0m3hrrqh6df65cpw3v1dcnl6xir3d3m3lwwmk4")))) + (properties + `((upstream-name . "Biostrings"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-iranges" ,r-iranges) + ("r-s4vectors" ,r-s4vectors) + ("r-xvector" ,r-xvector))) + (home-page "http://bioconductor.org/packages/Biostrings") + (synopsis "String objects and algorithms for biological sequences") + (description + "This package provides memory efficient string containers, string +matching algorithms, and other utilities, for fast manipulation of large +biological sequences or sets of sequences.") + (license license:artistic2.0))) + +(define-public r-rsamtools + (package + (name "r-rsamtools") + (version "1.22.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "Rsamtools" version)) + (sha256 + (base32 + "1yc3nzzms3igjwr4l9yd3wdac95glcs08b4cfp7disyly0wcskjd")))) + (properties + `((upstream-name . "Rsamtools"))) + (build-system r-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'use-system-zlib + (lambda _ + (substitute* "DESCRIPTION" + (("zlibbioc, ") "")) + (substitute* "NAMESPACE" + (("import\\(zlibbioc\\)") "")) + #t))))) + (inputs + `(("zlib" ,zlib))) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-biocparallel" ,r-biocparallel) + ("r-biostrings" ,r-biostrings) + ("r-bitops" ,r-bitops) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-s4vectors" ,r-s4vectors) + ("r-xvector" ,r-xvector))) + (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html") + (synopsis "Interface to samtools, bcftools, and tabix") + (description + "This package provides an interface to the 'samtools', 'bcftools', and +'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA, +binary variant call (BCF) and compressed indexed tab-delimited (tabix) +files.") + (license license:expat))) + +(define-public r-summarizedexperiment + (package + (name "r-summarizedexperiment") + (version "1.0.1") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "SummarizedExperiment" version)) + (sha256 + (base32 + "0w1dwp99p6i7sc3cn0ir3dr8ksgxwjf16675h5i8n6gbv4rl9lz6")))) + (properties + `((upstream-name . "SummarizedExperiment"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-s4vectors" ,r-s4vectors))) + (home-page "http://bioconductor.org/packages/SummarizedExperiment") + (synopsis "Container for representing genomic ranges by sample") + (description + "The SummarizedExperiment container contains one or more assays, each +represented by a matrix-like object of numeric or other mode. The rows +typically represent genomic ranges of interest and the columns represent +samples.") + (license license:artistic2.0))) + +(define-public r-genomicalignments + (package + (name "r-genomicalignments") + (version "1.6.1") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "GenomicAlignments" version)) + (sha256 + (base32 + "03pxzkmwcpl0d7a09ahan0nllfv7qw2i7w361w6af2s4n3xwrniz")))) + (properties + `((upstream-name . "GenomicAlignments"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-biocparallel" ,r-biocparallel) + ("r-biostrings" ,r-biostrings) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-rsamtools" ,r-rsamtools) + ("r-s4vectors" ,r-s4vectors) + ("r-summarizedexperiment" ,r-summarizedexperiment))) + (home-page "http://bioconductor.org/packages/GenomicAlignments") + (synopsis "Representation and manipulation of short genomic alignments") + (description + "This package provides efficient containers for storing and manipulating +short genomic alignments (typically obtained by aligning short reads to a +reference genome). This includes read counting, computing the coverage, +junction detection, and working with the nucleotide content of the +alignments.") + (license license:artistic2.0))) + +(define-public r-rtracklayer + (package + (name "r-rtracklayer") + (version "1.30.1") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "rtracklayer" version)) + (sha256 + (base32 + "1if31hg56islx5vwydpgs5gkyas26kyvv2ljv1c7jikpm62w14qv")))) + (build-system r-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'use-system-zlib + (lambda _ + (substitute* "DESCRIPTION" + (("zlibbioc, ") "")) + (substitute* "NAMESPACE" + (("import\\(zlibbioc\\)") "")) + #t))))) + (inputs + `(("zlib" ,zlib))) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-biostrings" ,r-biostrings) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicalignments" ,r-genomicalignments) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-rcurl" ,r-rcurl) + ("r-rsamtools" ,r-rsamtools) + ("r-s4vectors" ,r-s4vectors) + ("r-xml" ,r-xml) + ("r-xvector" ,r-xvector))) + (home-page "http://bioconductor.org/packages/rtracklayer") + (synopsis "R interface to genome browsers and their annotation tracks") + (description + "rtracklayer is an extensible framework for interacting with multiple +genome browsers (currently UCSC built-in) and manipulating annotation tracks +in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit +built-in). The user may export/import tracks to/from the supported browsers, +as well as query and modify the browser state, such as the current viewport.") + (license license:artistic2.0))) + +(define-public r-genomicfeatures + (package + (name "r-genomicfeatures") + (version "1.22.7") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "GenomicFeatures" version)) + (sha256 + (base32 + "1jb4s49ar5j9qslpd3kfdg2wrl4q7ciysd55h9a7zvspymxcngq8")))) + (properties + `((upstream-name . "GenomicFeatures"))) + (build-system r-build-system) + (propagated-inputs + `(("r-annotationdbi" ,r-annotationdbi) + ("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-biomart" ,r-biomart) + ("r-biostrings" ,r-biostrings) + ("r-dbi" ,r-dbi) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-rcurl" ,r-rcurl) + ("r-rsqlite" ,r-rsqlite) + ("r-rtracklayer" ,r-rtracklayer) + ("r-s4vectors" ,r-s4vectors) + ("r-xvector" ,r-xvector))) + (home-page "http://bioconductor.org/packages/GenomicFeatures") + (synopsis "Tools for working with transcript centric annotations") + (description + "This package provides a set of tools and methods for making and +manipulating transcript centric annotations. With these tools the user can +easily download the genomic locations of the transcripts, exons and cds of a +given organism, from either the UCSC Genome Browser or a BioMart +database (more sources will be supported in the future). This information is +then stored in a local database that keeps track of the relationship between +transcripts, exons, cds and genes. Flexible methods are provided for +extracting the desired features in a convenient format.") + (license license:artistic2.0))) + +(define-public r-go-db + (package + (name "r-go-db") + (version "3.2.2") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "GO.db" version)) + (sha256 + (base32 + "00gariag9ampz82dh0xllrc26r85d7vdcwc0vca5zdy147rwxr7f")))) + (properties + `((upstream-name . "GO.db"))) + (build-system r-build-system) + (home-page "http://bioconductor.org/packages/GO.db") + (synopsis "Annotation maps describing the entire Gene Ontology") + (description + "The purpose of this GO.db annotation package is to provide detailed +information about the latest version of the Gene Ontologies.") + (license license:artistic2.0))) + +(define-public r-topgo + (package + (name "r-topgo") + (version "2.22.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "topGO" version)) + (sha256 + (base32 + "029j9nb39b8l9xlzsp83pmjr8ap247aia387yzaa1yyw8klapdaf")))) + (properties + `((upstream-name . "topGO"))) + (build-system r-build-system) + (propagated-inputs + `(("r-annotationdbi" ,r-annotationdbi) + ("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-go-db" ,r-go-db) + ("r-sparsem" ,r-sparsem))) + (home-page "http://bioconductor.org/packages/topGO") + (synopsis "Enrichment analysis for gene ontology") + (description + "The topGO package provides tools for testing @dfn{gene ontology} (GO) +terms while accounting for the topology of the GO graph. Different test +statistics and different methods for eliminating local similarities and +dependencies between GO terms can be implemented and applied.") + ;; Any version of the LGPL applies. + (license license:lgpl2.1+))) + +(define-public r-bsgenome + (package + (name "r-bsgenome") + (version "1.38.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "BSgenome" version)) + (sha256 + (base32 + "130w0m6q8kkca7gyz1aqj5jjhalwvwi6rk2yvbjrnj4gpnncyrd2")))) + (properties + `((upstream-name . "BSgenome"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-biostrings" ,r-biostrings) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-rsamtools" ,r-rsamtools) + ("r-rtracklayer" ,r-rtracklayer) + ("r-s4vectors" ,r-s4vectors) + ("r-xvector" ,r-xvector))) + (home-page "http://bioconductor.org/packages/BSgenome") + (synopsis "Infrastructure for Biostrings-based genome data packages") + (description + "This package provides infrastructure shared by all Biostrings-based +genome data packages and support for efficient SNP representation.") + (license license:artistic2.0))) + +(define-public r-impute + (package + (name "r-impute") + (version "1.44.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "impute" version)) + (sha256 + (base32 + "0y4x5jk7gsf4xn56jrkdcdnxpcfll4h6ivncd7n4snmzixldvmvw")))) + (inputs + `(("gfortran" ,gfortran))) + (build-system r-build-system) + (home-page "http://bioconductor.org/packages/impute") + (synopsis "Imputation for microarray data") + (description + "This package provides a function to impute missing gene expression +microarray data, using nearest neighbor averaging.") + (license license:gpl2+))) + +(define-public r-seqpattern + (package + (name "r-seqpattern") + (version "1.2.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "seqPattern" version)) + (sha256 + (base32 + "0p9zj6bic7sa0hb2bjm988kkk5n9r1kvlbqkzvy702f642n0j53i")))) + (properties + `((upstream-name . "seqPattern"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biostrings" ,r-biostrings) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-plotrix" ,r-plotrix))) + (home-page "http://bioconductor.org/packages/seqPattern") + (synopsis "Visualising oligonucleotide patterns and motif occurrences") + (description + "This package provides tools to visualize oligonucleotide patterns and +sequence motif occurrences across a large set of sequences centred at a common +reference point and sorted by a user defined feature.") + (license license:gpl3+))) + +(define-public r-genomation + (package + (name "r-genomation") + (version "1.2.1") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "genomation" version)) + (sha256 + (base32 + "1mzs995snwim13qk9kz4q3nczpnbsy1allwp4whfq0cflg2mndfr")))) + (build-system r-build-system) + (propagated-inputs + `(("r-biostrings" ,r-biostrings) + ("r-bsgenome" ,r-bsgenome) + ("r-data-table" ,r-data-table) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicalignments" ,r-genomicalignments) + ("r-genomicranges" ,r-genomicranges) + ("r-ggplot2" ,r-ggplot2) + ("r-gridbase" ,r-gridbase) + ("r-impute" ,r-impute) + ("r-iranges" ,r-iranges) + ("r-matrixstats" ,r-matrixstats) + ("r-plotrix" ,r-plotrix) + ("r-plyr" ,r-plyr) + ("r-readr" ,r-readr) + ("r-reshape2" ,r-reshape2) + ("r-rsamtools" ,r-rsamtools) + ("r-rtracklayer" ,r-rtracklayer) + ("r-seqpattern" ,r-seqpattern))) + (home-page "http://bioinformatics.mdc-berlin.de/genomation/") + (synopsis "Summary, annotation and visualization of genomic data") + (description + "This package provides a package for summary and annotation of genomic +intervals. Users can visualize and quantify genomic intervals over +pre-defined functional regions, such as promoters, exons, introns, etc. The +genomic intervals represent regions with a defined chromosome position, which +may be associated with a score, such as aligned reads from HT-seq experiments, +TF binding sites, methylation scores, etc. The package can use any tabular +genomic feature data as long as it has minimal information on the locations of +genomic intervals. In addition, it can use BAM or BigWig files as input.") + (license license:artistic2.0))) + (define-public r-qtl (package (name "r-qtl") |