summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm230
1 files changed, 211 insertions, 19 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 314d0ad322..a9df676fc6 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -654,6 +654,73 @@ gapped, local, and paired-end alignment modes.")
(supported-systems '("x86_64-linux"))
(license license:gpl3+)))
+(define-public tophat
+ (package
+ (name "tophat")
+ (version "2.1.0")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
+ (patches (list (search-patch "tophat-build-with-later-seqan.patch")))
+ (modules '((guix build utils)))
+ (snippet
+ '(begin
+ ;; Remove bundled SeqAn and samtools
+ (delete-file-recursively "src/SeqAn-1.3")
+ (delete-file-recursively "src/samtools-0.1.18")
+ #t))))
+ (build-system gnu-build-system)
+ (arguments
+ '(#:parallel-build? #f ; not supported
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'use-system-samtools
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "src/Makefile.in"
+ (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
+ (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
+ (("SAMPROG = samtools_0\\.1\\.18") "")
+ (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
+ (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
+ (substitute* '("src/common.cpp"
+ "src/tophat.py")
+ (("samtools_0.1.18") (which "samtools")))
+ (substitute* '("src/common.h"
+ "src/bam2fastx.cpp")
+ (("#include \"bam.h\"") "#include <samtools/bam.h>")
+ (("#include \"sam.h\"") "#include <samtools/sam.h>"))
+ (substitute* '("src/bwt_map.h"
+ "src/map2gtf.h"
+ "src/align_status.h")
+ (("#include <bam.h>") "#include <samtools/bam.h>")
+ (("#include <sam.h>") "#include <samtools/sam.h>"))
+ #t)))))
+ (inputs
+ `(("boost" ,boost)
+ ("bowtie" ,bowtie)
+ ("samtools" ,samtools-0.1)
+ ("ncurses" ,ncurses)
+ ("python" ,python-2)
+ ("perl" ,perl)
+ ("zlib" ,zlib)
+ ("seqan" ,seqan)))
+ (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
+ (synopsis "Spliced read mapper for RNA-Seq data")
+ (description
+ "TopHat is a fast splice junction mapper for nucleotide sequence
+reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
+mammalian-sized genomes using the ultra high-throughput short read
+aligner Bowtie, and then analyzes the mapping results to identify
+splice junctions between exons.")
+ ;; TopHat is released under the Boost Software License, Version 1.0
+ ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
+ (license license:boost1.0)))
+
(define-public bwa
(package
(name "bwa")
@@ -915,6 +982,64 @@ files between different genome assemblies. It supports most commonly used
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
(license license:gpl2+)))
+(define-public cufflinks
+ (package
+ (name "cufflinks")
+ (version "2.2.1")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "http://cole-trapnell-lab.github.io/"
+ "cufflinks/assets/downloads/cufflinks-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:make-flags
+ (list
+ ;; The includes for "eigen" are located in a subdirectory.
+ (string-append "EIGEN_CPPFLAGS="
+ "-I" (assoc-ref %build-inputs "eigen")
+ "/include/eigen3/")
+ ;; Cufflinks must be linked with various boost libraries.
+ (string-append "LDFLAGS="
+ (string-join '("-lboost_system"
+ "-lboost_serialization"
+ "-lboost_thread"))))
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-search-for-bam
+ (lambda _
+ (substitute* '("ax_bam.m4"
+ "configure"
+ "src/hits.h")
+ (("<bam/sam\\.h>") "<samtools/sam.h>")
+ (("<bam/bam\\.h>") "<samtools/bam.h>")
+ (("<bam/version\\.hpp>") "<samtools/version.h>"))
+ #t)))
+ #:configure-flags
+ (list (string-append "--with-bam="
+ (assoc-ref %build-inputs "samtools")))))
+ (inputs
+ `(("eigen" ,eigen)
+ ("samtools" ,samtools-0.1)
+ ("htslib" ,htslib)
+ ("boost" ,boost)
+ ("python" ,python-2)
+ ("zlib" ,zlib)))
+ (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
+ (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
+ (description
+ "Cufflinks assembles RNA transcripts, estimates their abundances,
+and tests for differential expression and regulation in RNA-Seq
+samples. It accepts aligned RNA-Seq reads and assembles the
+alignments into a parsimonious set of transcripts. Cufflinks then
+estimates the relative abundances of these transcripts based on how
+many reads support each one, taking into account biases in library
+preparation protocols.")
+ (license license:boost1.0)))
+
(define-public cutadapt
(package
(name "cutadapt")
@@ -3016,40 +3141,107 @@ optimized for modern read lengths of 100 bases or higher, and takes advantage
of these reads to align data quickly through a hash-based indexing scheme.")
(license license:asl2.0)))
+(define-public sortmerna
+ (package
+ (name "sortmerna")
+ (version "2.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/biocore/sortmerna/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1670a92x1vvkacnvgr2i5xac3ls6lp4pc3n0bccnmllsnymggcf0"))))
+ (build-system gnu-build-system)
+ (outputs '("out" ;for binaries
+ "db")) ;for sequence databases
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin"))
+ (db (assoc-ref outputs "db"))
+ (share
+ (string-append db "/share/sortmerna/rRNA_databases")))
+ (install-file "sortmerna" bin)
+ (install-file "indexdb_rna" bin)
+ (for-each (lambda (file)
+ (install-file file share))
+ (find-files "rRNA_databases" ".*fasta"))
+ #t))))))
+ (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
+ (synopsis "Biological sequence analysis tool for NGS reads")
+ (description
+ "SortMeRNA is a biological sequence analysis tool for filtering, mapping
+and operational taxonomic unit (OTU) picking of next generation
+sequencing (NGS) reads. The core algorithm is based on approximate seeds and
+allows for fast and sensitive analyses of nucleotide sequences. The main
+application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
+ (license license:lgpl3)))
+
(define-public star
(package
(name "star")
- (version "2.4.2a")
+ (version "2.5.1b")
(source (origin
(method url-fetch)
- (uri (string-append
- "https://github.com/alexdobin/STAR/archive/STAR_"
- version ".tar.gz"))
+ (uri (string-append "https://github.com/alexdobin/STAR/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
(sha256
(base32
- "1c3rnm7r5l0kl3d04gl1g7938xqf1c2l0mla87rlplqg1hcns5mc"))
+ "0wzcfhkg10apnh0y73xlarfa79xxwxdizicbdl11wb48awk44iq4"))
(modules '((guix build utils)))
(snippet
- '(substitute* "source/Makefile"
- (("/bin/rm") "rm")))))
+ '(begin
+ (substitute* "source/Makefile"
+ (("/bin/rm") "rm"))
+ ;; Remove pre-built binaries and bundled htslib sources.
+ (delete-file-recursively "bin/MacOSX_x86_64")
+ (delete-file-recursively "bin/Linux_x86_64")
+ (delete-file-recursively "source/htslib")
+ #t))))
(build-system gnu-build-system)
(arguments
'(#:tests? #f ;no check target
#:make-flags '("STAR")
#:phases
- (alist-cons-after
- 'unpack 'enter-source-dir (lambda _ (chdir "source"))
- (alist-replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
- (install-file "STAR" bin)))
- (alist-delete
- 'configure %standard-phases)))))
+ (modify-phases %standard-phases
+ (add-after 'unpack 'enter-source-dir
+ (lambda _ (chdir "source") #t))
+ (add-after 'enter-source-dir 'do-not-use-bundled-htslib
+ (lambda _
+ (substitute* "Makefile"
+ (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
+ _ prefix) prefix))
+ (substitute* '("BAMfunctions.cpp"
+ "signalFromBAM.h"
+ "bam_cat.h"
+ "bam_cat.c"
+ "STAR.cpp"
+ "bamRemoveDuplicates.cpp")
+ (("#include \"htslib/([^\"]+\\.h)\"" _ header)
+ (string-append "#include <" header ">")))
+ (substitute* "IncludeDefine.h"
+ (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
+ (string-append "<" header ">")))
+ #t))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
+ (install-file "STAR" bin))
+ #t))
+ (delete 'configure))))
(native-inputs
`(("vim" ,vim))) ; for xxd
(inputs
- `(("zlib" ,zlib)))
+ `(("htslib" ,htslib)
+ ("zlib" ,zlib)))
(home-page "https://github.com/alexdobin/STAR")
(synopsis "Universal RNA-seq aligner")
(description
@@ -4002,7 +4194,7 @@ genomic intervals. In addition, it can use BAM or BigWig files as input.")
(define-public r-qtl
(package
(name "r-qtl")
- (version "1.37-11")
+ (version "1.38-4")
(source
(origin
(method url-fetch)
@@ -4010,7 +4202,7 @@ genomic intervals. In addition, it can use BAM or BigWig files as input.")
version ".tar.gz"))
(sha256
(base32
- "0h20d36mww7ljp51pfs66xq33yq4b4fwq9nsh02dpmfhlaxgx1xi"))))
+ "0rv9xhp8lyldpgwxqirhyjqvg07dr5x4x1x2jpyj37dada9ccyx3"))))
(build-system r-build-system)
(home-page "http://rqtl.org/")
(synopsis "R package for analyzing QTL experiments in genetics")