aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2016-07-20 11:42:02 +0200
committerLudovic Courtès <ludo@gnu.org>2016-07-20 11:42:17 +0200
commit7575655212ecfbcd1f04e429c8a7a41f8720d027 (patch)
tree558982d3cf50ef6b19ef293850de1f485fde66a6 /gnu/packages/bioinformatics.scm
parent5d4c90ae02f1e0b42d575bba2d828d63aaf79be5 (diff)
parent5f01078129f4eaa4760a14f22761cf357afb6738 (diff)
downloadpatches-7575655212ecfbcd1f04e429c8a7a41f8720d027.tar
patches-7575655212ecfbcd1f04e429c8a7a41f8720d027.tar.gz
Merge branch 'master' into core-updates
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm636
1 files changed, 595 insertions, 41 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index f5e7285193..1cbf85ff6c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -27,6 +27,7 @@
#:use-module (guix utils)
#:use-module (guix download)
#:use-module (guix git-download)
+ #:use-module (guix hg-download)
#:use-module (guix build-system ant)
#:use-module (guix build-system gnu)
#:use-module (guix build-system cmake)
@@ -49,18 +50,23 @@
#:use-module (gnu packages file)
#:use-module (gnu packages gawk)
#:use-module (gnu packages gcc)
+ #:use-module (gnu packages gd)
+ #:use-module (gnu packages image)
#:use-module (gnu packages java)
#:use-module (gnu packages linux)
+ #:use-module (gnu packages logging)
#:use-module (gnu packages machine-learning)
#:use-module (gnu packages maths)
#:use-module (gnu packages mpi)
#:use-module (gnu packages ncurses)
#:use-module (gnu packages pcre)
+ #:use-module (gnu packages pdf)
#:use-module (gnu packages perl)
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages popt)
#:use-module (gnu packages protobuf)
#:use-module (gnu packages python)
+ #:use-module (gnu packages readline)
#:use-module (gnu packages ruby)
#:use-module (gnu packages statistics)
#:use-module (gnu packages tbb)
@@ -70,6 +76,7 @@
#:use-module (gnu packages vim)
#:use-module (gnu packages web)
#:use-module (gnu packages xml)
+ #:use-module (gnu packages xorg)
#:use-module (gnu packages zip)
#:use-module (srfi srfi-1))
@@ -215,7 +222,7 @@ computational cluster.")
(define-public bedtools
(package
(name "bedtools")
- (version "2.25.0")
+ (version "2.26.0")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
@@ -223,7 +230,7 @@ computational cluster.")
(file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "1ywcy3yfwzhl905b51l0ffjia55h75vv3mw5xkvib04pp6pj548m"))))
+ "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
(build-system gnu-build-system)
(native-inputs `(("python" ,python-2)))
(inputs `(("samtools" ,samtools)
@@ -251,6 +258,51 @@ intervals from multiple files in widely-used genomic file formats such as BAM,
BED, GFF/GTF, VCF.")
(license license:gpl2)))
+;; Later releases of bedtools produce files with more columns than
+;; what Ribotaper expects.
+(define-public bedtools-2.18
+ (package (inherit bedtools)
+ (name "bedtools")
+ (version "2.18.0")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/arq5x/bedtools2/"
+ "archive/v" version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
+
+(define-public ribotaper
+ (package
+ (name "ribotaper")
+ (version "1.3.1")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://ohlerlab.mdc-berlin.de/"
+ "files/RiboTaper/RiboTaper_Version_"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
+ (build-system gnu-build-system)
+ (inputs
+ `(("bedtools" ,bedtools-2.18)
+ ("samtools" ,samtools-0.1)
+ ("r" ,r)
+ ("r-foreach" ,r-foreach)
+ ("r-xnomial" ,r-xnomial)
+ ("r-domc" ,r-domc)
+ ("r-multitaper" ,r-multitaper)
+ ("r-seqinr" ,r-seqinr)))
+ (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
+ (synopsis "Define translated ORFs using ribosome profiling data")
+ (description
+ "Ribotaper is a method for defining translated @dfn{open reading
+frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
+provides the Ribotaper pipeline.")
+ (license license:gpl3+)))
+
(define-public bioawk
(package
(name "bioawk")
@@ -421,7 +473,7 @@ into separate processes; and more.")
(define-public blast+
(package
(name "blast+")
- (version "2.2.31")
+ (version "2.4.0")
(source (origin
(method url-fetch)
(uri (string-append
@@ -429,7 +481,7 @@ into separate processes; and more.")
version "/ncbi-blast-" version "+-src.tar.gz"))
(sha256
(base32
- "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
+ "14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -1439,6 +1491,40 @@ accessing bigWig files.")
(native-inputs
`(("python-setuptools" ,python2-setuptools))))))
+(define-public python-dendropy
+ (package
+ (name "python-dendropy")
+ (version "4.1.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "DendroPy" version))
+ (sha256
+ (base32
+ "1jfz7gp18wph311w1yygbvjanb3n5mdqal439bb6myw41dwb5m63"))
+ ;; There are two known test failures that will be fixed in the next
+ ;; release after 4.1.0.
+ ;; https://github.com/jeetsukumaran/DendroPy/issues/48
+ (patches (search-patches
+ "python-dendropy-exclude-failing-tests.patch"))))
+ (build-system python-build-system)
+ (home-page "http://packages.python.org/DendroPy/")
+ (synopsis "Library for phylogenetics and phylogenetic computing")
+ (description
+ "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
+writing, simulation, processing and manipulation of phylogenetic
+trees (phylogenies) and characters.")
+ (license license:bsd-3)
+ (properties `((python2-variant . ,(delay python2-dendropy))))))
+
+(define-public python2-dendropy
+ (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
+ (package
+ (inherit base)
+ (native-inputs `(("python2-setuptools" ,python2-setuptools)
+ ,@(package-native-inputs base))))))
+
+
(define-public deeptools
(package
(name "deeptools")
@@ -1482,7 +1568,7 @@ identify enrichments with functional annotations of the genome.")
(define-public diamond
(package
(name "diamond")
- (version "0.8.7")
+ (version "0.8.15")
(source (origin
(method url-fetch)
(uri (string-append
@@ -1491,7 +1577,7 @@ identify enrichments with functional annotations of the genome.")
(file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "15r7gcrqc4pv5d4kvv530zc3xnni92c74y63zrxzidriss7591yx"))))
+ "14n0p28b4i5j8vvz1fl4xj1gxnpg98bj0kr3i90mhn7miwr4pkpw"))))
(build-system cmake-build-system)
(arguments
'(#:tests? #f ; no "check" target
@@ -1517,6 +1603,75 @@ data and settings.")
(license (license:non-copyleft "file://src/COPYING"
"See src/COPYING in the distribution."))))
+(define-public eigensoft
+ (let ((revision "1")
+ (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
+ (package
+ (name "eigensoft")
+ (version (string-append "6.1.2-"
+ revision "."
+ (string-take commit 9)))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/DReichLab/EIG.git")
+ (commit commit)))
+ (file-name (string-append "eigensoft-" commit "-checkout"))
+ (sha256
+ (base32
+ "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
+ (modules '((guix build utils)))
+ ;; Remove pre-built binaries.
+ (snippet '(begin
+ (delete-file-recursively "bin")
+ (mkdir "bin")
+ #t))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ; There are no tests.
+ #:make-flags '("CC=gcc")
+ #:phases
+ (modify-phases %standard-phases
+ ;; There is no configure phase, but the Makefile is in a
+ ;; sub-directory.
+ (replace 'configure
+ (lambda _
+ (chdir "src")
+ ;; The link flags are incomplete.
+ (substitute* "Makefile"
+ (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
+ #t))
+ ;; The provided install target only copies executables to
+ ;; the "bin" directory in the build root.
+ (add-after 'install 'actually-install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin")))
+ (mkdir-p bin)
+ (for-each (lambda (file)
+ (install-file file bin))
+ (find-files "../bin" ".*"))
+ #t))))))
+ (inputs
+ `(("gsl" ,gsl)
+ ("lapack" ,lapack)
+ ("openblas" ,openblas)
+ ("perl" ,perl)
+ ("gfortran" ,gfortran "lib")))
+ (home-page "https://github.com/DReichLab/EIG")
+ (synopsis "Tools for population genetics")
+ (description "The EIGENSOFT package provides tools for population
+genetics and stratification correction. EIGENSOFT implements methods commonly
+used in population genetics analyses such as PCA, computation of Tracy-Widom
+statistics, and finding related individuals in structured populations. It
+comes with a built-in plotting script and supports multiple file formats and
+quantitative phenotypes.")
+ ;; The license of the eigensoft tools is Expat, but since it's
+ ;; linking with the GNU Scientific Library (GSL) the effective
+ ;; license is the GPL.
+ (license license:gpl3+))))
+
(define-public edirect
(package
(name "edirect")
@@ -2170,7 +2325,7 @@ manipulating HTS data.")
(define-public htslib
(package
(name "htslib")
- (version "1.2.1")
+ (version "1.3.1")
(source (origin
(method url-fetch)
(uri (string-append
@@ -2178,7 +2333,7 @@ manipulating HTS data.")
version "/htslib-" version ".tar.bz2"))
(sha256
(base32
- "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
+ "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
(build-system gnu-build-system)
(arguments
`(#:phases
@@ -2301,6 +2456,87 @@ command, or queried for specific k-mers with @code{jellyfish query}.")
;; files such as lib/jsoncpp.cpp are released under the Expat license.
(license (list license:gpl3+ license:expat))))
+(define-public khmer
+ (package
+ (name "khmer")
+ (version "2.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "khmer" version))
+ (sha256
+ (base32
+ "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
+ (patches (search-patches "khmer-use-libraries.patch"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'set-paths
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ ;; Delete bundled libraries.
+ (delete-file-recursively "third-party/zlib")
+ (delete-file-recursively "third-party/bzip2")
+ ;; Replace bundled seqan.
+ (let* ((seqan-all "third-party/seqan")
+ (seqan-include (string-append
+ seqan-all "/core/include")))
+ (delete-file-recursively seqan-all)
+ (copy-recursively (string-append (assoc-ref inputs "seqan")
+ "/include/seqan")
+ (string-append seqan-include "/seqan")))
+ ;; We do not replace the bundled MurmurHash as the canonical
+ ;; repository for this code 'SMHasher' is unsuitable for
+ ;; providing a library. See
+ ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
+ #t))
+ (add-after 'unpack 'set-cc
+ (lambda _
+ (setenv "CC" "gcc")
+ #t))
+ ;; It is simpler to test after installation.
+ (delete 'check)
+ (add-after 'install 'post-install-check
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out")))
+ (setenv "PATH"
+ (string-append
+ (getenv "PATH")
+ ":"
+ (assoc-ref outputs "out")
+ "/bin"))
+ (setenv "PYTHONPATH"
+ (string-append
+ (getenv "PYTHONPATH")
+ ":"
+ out
+ "/lib/python"
+ (string-take (string-take-right
+ (assoc-ref inputs "python") 5) 3)
+ "/site-packages"))
+ (with-directory-excursion "build"
+ (zero? (system* "nosetests" "khmer" "--attr"
+ "!known_failing")))))))))
+ (native-inputs
+ `(("seqan" ,seqan)
+ ("python-nose" ,python-nose)))
+ (inputs
+ `(("zlib" ,zlib)
+ ("bzip2" ,bzip2)
+ ("python-screed" ,python-screed)
+ ("python-bz2file" ,python-bz2file)))
+ (home-page "https://khmer.readthedocs.org/")
+ (synopsis "K-mer counting, filtering and graph traversal library")
+ (description "The khmer software is a set of command-line tools for
+working with DNA shotgun sequencing data from genomes, transcriptomes,
+metagenomes and single cells. Khmer can make de novo assemblies faster, and
+sometimes better. Khmer can also identify and fix problems with shotgun
+data.")
+ ;; When building on i686, armhf and mips64el, we get the following error:
+ ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
+ (supported-systems '("x86_64-linux"))
+ (license license:bsd-3)))
+
(define-public macs
(package
(name "macs")
@@ -2333,7 +2569,7 @@ sequencing tag position and orientation.")
(define-public mafft
(package
(name "mafft")
- (version "7.267")
+ (version "7.299")
(source (origin
(method url-fetch)
(uri (string-append
@@ -2342,7 +2578,7 @@ sequencing tag position and orientation.")
(file-name (string-append name "-" version ".tgz"))
(sha256
(base32
- "1xl6xq1rfxkws0svrlhyqxhhwbv6r77jwblsdpcyiwzsscw6wlk0"))))
+ "1pwwdy5a17ggx8h9v9y712ilswj27dc3d23r65l56jgjz67y5zc0"))))
(build-system gnu-build-system)
(arguments
`(#:tests? #f ; no automated tests, though there are tests in the read me
@@ -2408,7 +2644,7 @@ sequences).")
(define-public metabat
(package
(name "metabat")
- (version "0.26.1")
+ (version "0.26.3")
(source (origin
(method url-fetch)
(uri (string-append
@@ -2417,31 +2653,40 @@ sequences).")
(file-name (string-append name "-" version ".tar.bz2"))
(sha256
(base32
- "0vgrhbaxg4dkxyax2kbigak7w0arhqvw0szwp6gd9wmyilc44kfa"))))
+ "1vpfvgsn8wdsv1g7z73zxcncskx7dy7bw5msg1hhibk25ay11pyg"))))
(build-system gnu-build-system)
(arguments
`(#:phases
(modify-phases %standard-phases
(add-after 'unpack 'fix-includes
- (lambda _
- (substitute* "SConstruct"
- (("/include/bam/bam.h")
- "/include/samtools/bam.h"))
- (substitute* "src/BamUtils.h"
- (("^#include \"bam/bam\\.h\"")
- "#include \"samtools/bam.h\"")
- (("^#include \"bam/sam\\.h\"")
- "#include \"samtools/sam.h\""))
- (substitute* "src/KseqReader.h"
- (("^#include \"bam/kseq\\.h\"")
- "#include \"samtools/kseq.h\""))
- #t))
+ (lambda _
+ (substitute* "src/BamUtils.h"
+ (("^#include \"bam/bam\\.h\"")
+ "#include \"samtools/bam.h\"")
+ (("^#include \"bam/sam\\.h\"")
+ "#include \"samtools/sam.h\""))
+ (substitute* "src/KseqReader.h"
+ (("^#include \"bam/kseq\\.h\"")
+ "#include \"htslib/kseq.h\""))
+ #t))
(add-after 'unpack 'fix-scons
- (lambda _
- (substitute* "SConstruct" ; Do not distribute README
- (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)")
- ""))
- #t))
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "SConstruct"
+ (("^htslib_dir = 'samtools'")
+ (string-append "hitslib_dir = '"
+ (assoc-ref inputs "htslib")
+ "'"))
+ (("^samtools_dir = 'samtools'")
+ (string-append "samtools_dir = '"
+ (assoc-ref inputs "htslib")
+ "'"))
+ (("^findStaticOrShared\\('bam', hts_lib")
+ (string-append "findStaticOrShared('bam', '"
+ (assoc-ref inputs "samtools")
+ "/lib'"))
+ ;; Do not distribute README.
+ (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
+ #t))
(delete 'configure)
(replace 'build
(lambda* (#:key inputs outputs #:allow-other-keys)
@@ -2451,16 +2696,10 @@ sequences).")
"PREFIX="
(assoc-ref outputs "out"))
(string-append
- "HTSLIB_DIR="
- (assoc-ref inputs "htslib"))
- (string-append
- "SAMTOOLS_DIR="
- (assoc-ref inputs "samtools"))
- (string-append
"BOOST_ROOT="
(assoc-ref inputs "boost"))
"install"))))
- ;; check and install carried out during build phase
+ ;; Check and install are carried out during build phase.
(delete 'check)
(delete 'install))))
(inputs
@@ -2919,6 +3158,75 @@ while RNA-seq specific modules evaluate sequencing saturation, mapped reads
distribution, coverage uniformity, strand specificity, etc.")
(license license:gpl3+)))
+(define-public seek
+ ;; There are no release tarballs. According to the installation
+ ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
+ ;; stable release is identified by this changeset ID.
+ (let ((changeset "2329130")
+ (revision "1"))
+ (package
+ (name "seek")
+ (version (string-append "0-" revision "." changeset))
+ (source (origin
+ (method hg-fetch)
+ (uri (hg-reference
+ (url "https://bitbucket.org/libsleipnir/sleipnir")
+ (changeset changeset)))
+ (sha256
+ (base32
+ "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:modules ((srfi srfi-1)
+ (guix build gnu-build-system)
+ (guix build utils))
+ #:phases
+ (let ((dirs '("SeekMiner"
+ "SeekEvaluator"
+ "SeekPrep"
+ "Distancer"
+ "Data2DB"
+ "PCL2Bin")))
+ (modify-phases %standard-phases
+ (add-before 'configure 'bootstrap
+ (lambda _
+ (zero? (system* "bash" "gen_auto"))))
+ (add-after 'build 'build-additional-tools
+ (lambda* (#:key make-flags #:allow-other-keys)
+ (every (lambda (dir)
+ (with-directory-excursion (string-append "tools/" dir)
+ (zero? (apply system* "make" make-flags))))
+ dirs)))
+ (add-after 'install 'install-additional-tools
+ (lambda* (#:key make-flags #:allow-other-keys)
+ (fold (lambda (dir result)
+ (with-directory-excursion (string-append "tools/" dir)
+ (and result
+ (zero? (apply system*
+ `("make" ,@make-flags "install"))))))
+ #t dirs)))))))
+ (inputs
+ `(("gsl" ,gsl)
+ ("boost" ,boost)
+ ("libsvm" ,libsvm)
+ ("readline" ,readline)
+ ("gengetopt" ,gengetopt)
+ ("log4cpp" ,log4cpp)))
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)
+ ("perl" ,perl)))
+ (home-page "http://seek.princeton.edu")
+ (synopsis "Gene co-expression search engine")
+ (description
+ "SEEK is a computational gene co-expression search engine. SEEK provides
+biologists with a way to navigate the massive human expression compendium that
+now contains thousands of expression datasets. SEEK returns a robust ranking
+of co-expressed genes in the biological area of interest defined by the user's
+query genes. It also prioritizes thousands of expression datasets according
+to the user's query of interest.")
+ (license license:cc-by3.0))))
+
(define-public samtools
(package
(name "samtools")
@@ -3359,6 +3667,44 @@ optimize the sequencing depth, or to screen multiple libraries to avoid low
complexity samples.")
(license license:gpl3+)))
+(define-public python-screed
+ (package
+ (name "python-screed")
+ (version "0.9")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "screed" version))
+ (sha256
+ (base32
+ "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'check
+ (lambda _
+ (setenv "PYTHONPATH"
+ (string-append (getenv "PYTHONPATH") ":."))
+ (zero? (system* "nosetests" "--attr" "!known_failing")))))))
+ (native-inputs
+ `(("python-nose" ,python-nose)))
+ (inputs
+ `(("python-bz2file" ,python-bz2file)))
+ (home-page "http://github.com/dib-lab/screed/")
+ (synopsis "Short read sequence database utilities")
+ (description "Screed parses FASTA and FASTQ files and generates databases.
+Values such as sequence name, sequence description, sequence quality and the
+sequence itself can be retrieved from these databases.")
+ (license license:bsd-3)))
+
+(define-public python2-screed
+ (let ((base (package-with-python2 (strip-python2-variant python-screed))))
+ (package
+ (inherit base)
+ (native-inputs `(("python2-setuptools" ,python2-setuptools)
+ ,@(package-native-inputs base))))))
+
(define-public sra-tools
(package
(name "sra-tools")
@@ -3600,7 +3946,7 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
(define-public star
(package
(name "star")
- (version "2.5.1b")
+ (version "2.5.2a")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/alexdobin/STAR/archive/"
@@ -3608,7 +3954,7 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
(file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "0wzcfhkg10apnh0y73xlarfa79xxwxdizicbdl11wb48awk44iq4"))
+ "0xjlsm4p9flln111hv4xx7xy94c2nl53zvdvbk9winmiradjsdra"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -3665,6 +4011,8 @@ by seed clustering and stitching procedure. In addition to unbiased de novo
detection of canonical junctions, STAR can discover non-canonical splices and
chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
sequences.")
+ ;; Only 64-bit systems are supported according to the README.
+ (supported-systems '("x86_64-linux" "mips64el-linux"))
;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
(license license:gpl3+)))
@@ -3869,7 +4217,7 @@ data types as well.")
(define-public vsearch
(package
(name "vsearch")
- (version "2.0.0")
+ (version "2.0.1")
(source
(origin
(method url-fetch)
@@ -3879,7 +4227,7 @@ data types as well.")
(file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "1sd57abgx077icqrbj36jq9q7pdpzc6dbics2pn1555kisq2jhfh"))
+ "0q7szwbf7r29yxkhb415a8i51vj87kvl5ap7h09w7k9ycb2ywvzw"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -4191,6 +4539,30 @@ In addition, a few low-level concrete subclasses of general interest (e.g.
S4Vectors package itself.")
(license license:artistic2.0)))
+(define-public r-seqinr
+ (package
+ (name "r-seqinr")
+ (version "3.1-3")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (cran-uri "seqinr" version))
+ (sha256
+ (base32
+ "0bbjfwbqg74wsamb3iz01g0ssdpdpg65gh00y9xlnpk4wb990n4n"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-ade4" ,r-ade4)))
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "http://seqinr.r-forge.r-project.org/")
+ (synopsis "Biological sequences retrieval and analysis")
+ (description
+ "This package provides tools for exploratory data analysis and data
+visualization of biological sequence (DNA and protein) data. It also includes
+utilities for sequence data management under the ACNUC system.")
+ (license license:gpl2+)))
+
(define-public r-iranges
(package
(name "r-iranges")
@@ -4246,6 +4618,31 @@ translation between different chromosome sequence naming conventions (e.g.,
names in their natural, rather than lexicographic, order.")
(license license:artistic2.0)))
+(define-public r-edger
+ (package
+ (name "r-edger")
+ (version "3.14.0")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "edgeR" version))
+ (sha256
+ (base32
+ "14vrygy7rz5ngaap4kgkvr3j18y5l6m742n79h68plk6iqgmsskn"))))
+ (properties `((upstream-name . "edgeR")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-limma" ,r-limma)))
+ (home-page "http://bioinf.wehi.edu.au/edgeR")
+ (synopsis "EdgeR does empirical analysis of digital gene expression data")
+ (description "This package can do differential expression analysis of
+RNA-seq expression profiles with biological replication. It implements a range
+of statistical methodology based on the negative binomial distributions,
+including empirical Bayes estimation, exact tests, generalized linear models
+and quasi-likelihood tests. It be applied to differential signal analysis of
+other types of genomic data that produce counts, including ChIP-seq, SAGE and
+CAGE.")
+ (license license:gpl2+)))
+
(define-public r-variantannotation
(package
(name "r-variantannotation")
@@ -4278,6 +4675,25 @@ names in their natural, rather than lexicographic, order.")
coding changes and predict coding outcomes.")
(license license:artistic2.0)))
+(define-public r-limma
+ (package
+ (name "r-limma")
+ (version "3.28.14")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "limma" version))
+ (sha256
+ (base32
+ "1jgn66ajafhjlqpfcw2p85h6ah8mgmz66znmsw6pcapia7d34akw"))))
+ (build-system r-build-system)
+ (home-page "http://bioinf.wehi.edu.au/limma")
+ (synopsis "Package for linear models for microarray and RNA-seq data")
+ (description "This package can be used for the analysis of gene expression
+studies, especially the use of linear models for analysing designed experiments
+and the assessment of differential expression. The analysis methods apply to
+different technologies, including microarrays, RNA-seq, and quantitative PCR.")
+ (license license:gpl2+)))
+
(define-public r-xvector
(package
(name "r-xvector")
@@ -5237,6 +5653,115 @@ two-dimensional genome scans.")
libraries for systems that do not have these available via other means.")
(license license:artistic2.0)))
+(define-public emboss
+ (package
+ (name "emboss")
+ (version "6.5.7")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
+ (version-major+minor version) ".0/"
+ "EMBOSS-" version ".tar.gz"))
+ (sha256
+ (base32
+ "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:configure-flags
+ (list (string-append "--with-hpdf="
+ (assoc-ref %build-inputs "libharu")))
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-checks
+ (lambda _
+ ;; The PNGDRIVER tests check for the presence of libgd, libpng
+ ;; and zlib, but assume that they are all found at the same
+ ;; prefix.
+ (substitute* "configure.in"
+ (("CHECK_PNGDRIVER")
+ "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
+AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
+AM_CONDITIONAL(AMPNG, true)"))
+ #t))
+ (add-after 'unpack 'disable-update-check
+ (lambda _
+ ;; At build time there is no connection to the Internet, so
+ ;; looking for updates will not work.
+ (substitute* "Makefile.am"
+ (("\\$\\(bindir\\)/embossupdate") ""))
+ #t))
+ (add-before 'configure 'autogen
+ (lambda _ (zero? (system* "autoreconf" "-vif")))))))
+ (inputs
+ `(("perl" ,perl)
+ ("libpng" ,libpng)
+ ("gd" ,gd)
+ ("libx11" ,libx11)
+ ("libharu" ,libharu)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)
+ ("libtool" ,libtool)
+ ("pkg-config" ,pkg-config)))
+ (home-page "http://emboss.sourceforge.net")
+ (synopsis "Molecular biology analysis suite")
+ (description "EMBOSS is the \"European Molecular Biology Open Software
+Suite\". EMBOSS is an analysis package specially developed for the needs of
+the molecular biology (e.g. EMBnet) user community. The software
+automatically copes with data in a variety of formats and even allows
+transparent retrieval of sequence data from the web. It also provides a
+number of libraries for the development of software in the field of molecular
+biology. EMBOSS also integrates a range of currently available packages and
+tools for sequence analysis into a seamless whole.")
+ (license license:gpl2+)))
+
+(define-public bits
+ (let ((revision "1")
+ (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
+ (package
+ (name "bits")
+ ;; The version is 2.13.0 even though no release archives have been
+ ;; published as yet.
+ (version (string-append "2.13.0-" revision "." (string-take commit 9)))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/arq5x/bits.git")
+ (commit commit)))
+ (file-name (string-append name "-" version "-checkout"))
+ (sha256
+ (base32
+ "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ;no tests included
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ (add-after 'unpack 'remove-cuda
+ (lambda _
+ (substitute* "Makefile"
+ ((".*_cuda") "")
+ (("(bits_test_intersections) \\\\" _ match) match))
+ #t))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (copy-recursively
+ "bin" (string-append (assoc-ref outputs "out") "/bin"))
+ #t)))))
+ (inputs
+ `(("gsl" ,gsl)
+ ("zlib" ,zlib)))
+ (home-page "https://github.com/arq5x/bits")
+ (synopsis "Implementation of binary interval search algorithm")
+ (description "This package provides an implementation of the
+BITS (Binary Interval Search) algorithm, an approach to interval set
+intersection. It is especially suited for the comparison of diverse genomic
+datasets and the exploration of large datasets of genome
+intervals (e.g. genes, sequence alignments).")
+ (license license:gpl2))))
+
(define-public piranha
;; There is no release tarball for the latest version. The latest commit is
;; older than one year at the time of this writing.
@@ -5372,3 +5897,32 @@ group or two ChIP groups run under different conditions.")
(description "This program compares version strings. It intends to be a
replacement for strverscmp.")
(license license:gpl3+))))
+
+(define-public multiqc
+ (package
+ (name "multiqc")
+ (version "0.6")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "multiqc" version))
+ (sha256
+ (base32
+ "0avw11h63ldpxy5pizc3wl1wa01ha7q10wb240nggsjz3jaqvyiy"))))
+ (build-system python-build-system)
+ (propagated-inputs
+ `(("python-jinja2" ,python-jinja2)
+ ("python-simplejson" ,python-simplejson)
+ ("python-pyyaml" ,python-pyyaml)
+ ("python-click" ,python-click)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-numpy" ,python-numpy)))
+ (native-inputs
+ `(("python-setuptools" ,python-setuptools)))
+ (home-page "http://multiqc.info")
+ (synopsis "Aggregate bioinformatics analysis reports")
+ (description
+ "MultiQC is a tool to aggregate bioinformatics results across many
+samples into a single report. It contains modules for a large number of
+common bioinformatics tools.")
+ (license license:gpl3)))