diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 313 |
1 files changed, 286 insertions, 27 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index bcb7eba48f..c7836f173e 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -24,6 +24,7 @@ #:use-module (guix git-download) #:use-module (guix build-system gnu) #:use-module (guix build-system cmake) + #:use-module (guix build-system perl) #:use-module (guix build-system python) #:use-module (guix build-system trivial) #:use-module (gnu packages) @@ -45,6 +46,7 @@ #:use-module (gnu packages tbb) #:use-module (gnu packages textutils) #:use-module (gnu packages vim) + #:use-module (gnu packages web) #:use-module (gnu packages xml) #:use-module (gnu packages zip)) @@ -62,7 +64,17 @@ (base32 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018")))) (build-system cmake-build-system) - (arguments `(#:tests? #f)) ;no "check" target + (arguments + `(#:tests? #f ;no "check" target + #:phases + (modify-phases %standard-phases + (add-before + 'configure 'set-ldflags + (lambda* (#:key outputs #:allow-other-keys) + (setenv "LDFLAGS" + (string-append + "-Wl,-rpath=" + (assoc-ref outputs "out") "/lib/bamtools"))))))) (inputs `(("zlib" ,zlib))) (home-page "https://github.com/pezmaster31/bamtools") (synopsis "C++ API and command-line toolkit for working with BAM data") @@ -532,6 +544,74 @@ file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.") other types of unwanted sequence from high-throughput sequencing reads.") (license license:expat))) +(define-public edirect + (package + (name "edirect") + (version "2.50") + (source (origin + (method url-fetch) + ;; Note: older versions are not retained. + (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip") + (sha256 + (base32 + "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj")))) + (build-system perl-build-system) + (arguments + `(#:tests? #f ;no "check" target + #:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'build) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((target (string-append (assoc-ref outputs "out") + "/bin"))) + (mkdir-p target) + (copy-file "edirect.pl" + (string-append target "/edirect.pl")) + #t))) + (add-after + 'install 'wrap-program + (lambda* (#:key inputs outputs #:allow-other-keys) + ;; Make sure 'edirect.pl' finds all perl inputs at runtime. + (let* ((out (assoc-ref outputs "out")) + (path (getenv "PERL5LIB"))) + (wrap-program (string-append out "/bin/edirect.pl") + `("PERL5LIB" ":" prefix (,path))))))))) + (inputs + `(("perl-html-parser" ,perl-html-parser) + ("perl-encode-locale" ,perl-encode-locale) + ("perl-file-listing" ,perl-file-listing) + ("perl-html-tagset" ,perl-html-tagset) + ("perl-html-tree" ,perl-html-tree) + ("perl-http-cookies" ,perl-http-cookies) + ("perl-http-date" ,perl-http-date) + ("perl-http-message" ,perl-http-message) + ("perl-http-negotiate" ,perl-http-negotiate) + ("perl-lwp-mediatypes" ,perl-lwp-mediatypes) + ("perl-lwp-protocol-https" ,perl-lwp-protocol-https) + ("perl-net-http" ,perl-net-http) + ("perl-uri" ,perl-uri) + ("perl-www-robotrules" ,perl-www-robotrules) + ("perl" ,perl))) + (native-inputs + `(("unzip" ,unzip))) + (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288") + (synopsis "Tools for accessing the NCBI's set of databases") + (description + "Entrez Direct (EDirect) is a method for accessing the National Center +for Biotechnology Information's (NCBI) set of interconnected +databases (publication, sequence, structure, gene, variation, expression, +etc.) from a terminal. Functions take search terms from command-line +arguments. Individual operations are combined to build multi-step queries. +Record retrieval and formatting normally complete the process. + +EDirect also provides an argument-driven function that simplifies the +extraction of data from document summaries or other results that are returned +in structured XML format. This can eliminate the need for writing custom +software to answer ad hoc questions.") + (license license:public-domain))) + (define-public express (package (name "express") @@ -839,6 +919,41 @@ sequencing (HTS) data. There are also an number of useful utilities for manipulating HTS data.") (license license:expat))) +(define-public htslib + (package + (name "htslib") + (version "1.2.1") + (source (origin + (method url-fetch) + (uri (string-append + "https://github.com/samtools/htslib/releases/download/" + version "/htslib-" version ".tar.bz2")) + (sha256 + (base32 + "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx")))) + (build-system gnu-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after + 'unpack 'patch-tests + (lambda _ + (substitute* "test/test.pl" + (("/bin/bash") (which "bash"))) + #t))))) + (inputs + `(("zlib" ,zlib))) + (native-inputs + `(("perl" ,perl))) + (home-page "http://www.htslib.org") + (synopsis "C library for reading/writing high-throughput sequencing data") + (description + "HTSlib is a C library for reading/writing high-throughput sequencing +data. It also provides the bgzip, htsfile, and tabix utilities.") + ;; Files under cram/ are released under the modified BSD license; + ;; the rest is released under the Expat license + (license (list license:expat license:bsd-3)))) + (define-public macs (package (name "macs") @@ -1003,6 +1118,84 @@ files and writing bioinformatics applications.") generated using the PacBio Iso-Seq protocol.") (license license:bsd-3)))) +(define-public rsem + (package + (name "rsem") + (version "1.2.20") + (source + (origin + (method url-fetch) + (uri + (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-" + version ".tar.gz")) + (sha256 + (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q")) + (patches (list (search-patch "rsem-makefile.patch"))) + (modules '((guix build utils))) + (snippet + '(begin + ;; remove bundled copy of boost + (delete-file-recursively "boost") + #t)))) + (build-system gnu-build-system) + (arguments + `(#:tests? #f ;no "check" target + #:phases + (modify-phases %standard-phases + ;; No "configure" script. + ;; Do not build bundled samtools library. + (replace 'configure + (lambda _ + (substitute* "Makefile" + (("^all : sam/libbam.a") "all : ")) + #t)) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (string-append (assoc-ref outputs "out"))) + (bin (string-append out "/bin/")) + (perl (string-append out "/lib/perl5/site_perl"))) + (mkdir-p bin) + (mkdir-p perl) + (for-each (lambda (file) + (copy-file file + (string-append bin (basename file)))) + (find-files "." "rsem-.*")) + (copy-file "rsem_perl_utils.pm" + (string-append perl "/rsem_perl_utils.pm"))) + #t)) + (add-after + 'install 'wrap-program + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (for-each (lambda (prog) + (wrap-program (string-append out "/bin/" prog) + `("PERL5LIB" ":" prefix + (,(string-append out "/lib/perl5/site_perl"))))) + '("rsem-plot-transcript-wiggles" + "rsem-calculate-expression" + "rsem-generate-ngvector" + "rsem-run-ebseq" + "rsem-prepare-reference"))) + #t))))) + (inputs + `(("boost" ,boost) + ("ncurses" ,ncurses) + ("r" ,r) + ("perl" ,perl) + ("samtools" ,samtools-0.1) + ("zlib" ,zlib))) + (home-page "http://deweylab.biostat.wisc.edu/rsem/") + (synopsis "Estimate gene expression levels from RNA-Seq data") + (description + "RSEM is a software package for estimating gene and isoform expression +levels from RNA-Seq data. The RSEM package provides a user-friendly +interface, supports threads for parallel computation of the EM algorithm, +single-end and paired-end read data, quality scores, variable-length reads and +RSPD estimation. In addition, it provides posterior mean and 95% credibility +interval estimates for expression levels. For visualization, it can generate +BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.") + (license license:gpl3+))) + (define-public rseqc (package (name "rseqc") @@ -1068,32 +1261,31 @@ distribution, coverage uniformity, strand specificity, etc.") ;; systems. #:tests? ,(string=? (or (%current-system) (%current-target-system)) "x86_64-linux") - #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out"))) + #:make-flags (list "LIBCURSES=-lncurses" + (string-append "prefix=" (assoc-ref %outputs "out"))) #:phases (alist-cons-after 'unpack - 'patch-makefile-curses - (lambda _ - (substitute* "Makefile" - (("-lcurses") "-lncurses"))) + 'patch-tests + (lambda* (#:key inputs #:allow-other-keys) + (let ((bash (assoc-ref inputs "bash"))) + (substitute* "test/test.pl" + ;; The test script calls out to /bin/bash + (("/bin/bash") + (string-append bash "/bin/bash")) + ;; There are two failing tests upstream relating to the "stats" + ;; subcommand in test_usage_subcommand ("did not have Usage" + ;; and "usage did not mention samtools stats"), so we disable + ;; them. + (("(test_usage_subcommand\\(.*\\);)" cmd) + (string-append "unless ($subcommand eq 'stats') {" cmd "};"))))) (alist-cons-after - 'unpack - 'patch-tests - (lambda* (#:key inputs #:allow-other-keys) - (let ((bash (assoc-ref inputs "bash"))) - (substitute* "test/test.pl" - ;; The test script calls out to /bin/bash - (("/bin/bash") - (string-append bash "/bin/bash")) - ;; There are two failing tests upstream relating to the "stats" - ;; subcommand in test_usage_subcommand ("did not have Usage" - ;; and "usage did not mention samtools stats"), so we disable - ;; them. - (("(test_usage_subcommand\\(.*\\);)" cmd) - (string-append "unless ($subcommand eq 'stats') {" cmd "};"))))) - (alist-delete - 'configure - %standard-phases))))) + 'install 'install-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib (string-append (assoc-ref outputs "out") "/lib"))) + (mkdir-p lib) + (copy-file "libbam.a" (string-append lib "/libbam.a")))) + (alist-delete 'configure %standard-phases))))) (native-inputs `(("pkg-config" ,pkg-config))) (inputs `(("ncurses" ,ncurses) ("perl" ,perl) @@ -1108,6 +1300,34 @@ variant calling (in conjunction with bcftools), and a simple alignment viewer.") (license license:expat))) +(define-public samtools-0.1 + ;; This is the most recent version of the 0.1 line of samtools. The input + ;; and output formats differ greatly from that used and produced by samtools + ;; 1.x and is still used in many bioinformatics pipelines. + (package (inherit samtools) + (version "0.1.19") + (source + (origin + (method url-fetch) + (uri + (string-append "mirror://sourceforge/samtools/" + version "/samtools-" version ".tar.bz2")) + (sha256 + (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h")))) + (arguments + (substitute-keyword-arguments (package-arguments samtools) + ((#:tests? tests) #f) ;no "check" target + ((#:phases phases) + `(modify-phases ,phases + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((bin (string-append + (assoc-ref outputs "out") "/bin"))) + (mkdir-p bin) + (copy-file "samtools" + (string-append bin "/samtools"))))) + (delete 'patch-tests))))))) + (define-public ngs-sdk (package (name "ngs-sdk") @@ -1266,11 +1486,16 @@ simultaneously.") (assoc-ref inputs "hdf5")))))) (alist-cons-after 'install 'install-interfaces - (lambda* (#:key system outputs #:allow-other-keys) - ;; Install interface libraries + (lambda* (#:key outputs #:allow-other-keys) + ;; Install interface libraries. On i686 the interface libraries + ;; are installed to "linux/gcc/i386", so we need to use the Linux + ;; architecture name ("i386") instead of the target system prefix + ;; ("i686"). (mkdir (string-append (assoc-ref outputs "out") "/ilib")) (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/" - (car (string-split system #\-)) + ,(system->linux-architecture + (or (%current-target-system) + (%current-system))) "/rel/ilib") (string-append (assoc-ref outputs "out") "/ilib")) @@ -1513,7 +1738,40 @@ against local background noises.") "/sources/shogun-" version ".tar.bz2")) (sha256 (base32 - "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb")))) + "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb")) + (modules '((guix build utils) + (ice-9 rdelim))) + (snippet + '(begin + ;; Remove non-free sources and files referencing them + (for-each delete-file + (find-files "src/shogun/classifier/svm/" + "SVMLight\\.(cpp|h)")) + (for-each delete-file + (find-files "examples/undocumented/libshogun/" + (string-append + "(classifier_.*svmlight.*|" + "evaluation_cross_validation_locked_comparison).cpp"))) + ;; Remove non-free functions. + (define (delete-ifdefs file) + (with-atomic-file-replacement file + (lambda (in out) + (let loop ((line (read-line in 'concat)) + (skipping? #f)) + (if (eof-object? line) + #t + (let ((skip-next? + (or (and skipping? + (not (string-prefix? + "#endif //USE_SVMLIGHT" line))) + (string-prefix? + "#ifdef USE_SVMLIGHT" line)))) + (when (or (not skipping?) + (and skipping? (not skip-next?))) + (display line out)) + (loop (read-line in 'concat) skip-next?))))))) + (for-each delete-ifdefs (find-files "src/shogun/kernel/" + "^Kernel\\.(cpp|h)")))))) (build-system cmake-build-system) (arguments '(#:tests? #f ;no check target @@ -1621,6 +1879,7 @@ in terms of new algorithms.") (arguments `(#:tests? #f ; no "check" target #:make-flags (list + "CFLAGS=-O2" ; override "-m64" flag (string-append "PREFIX=" (assoc-ref %outputs "out")) (string-append "MANDIR=" (assoc-ref %outputs "out") "/share/man/man1")) |