diff options
author | Ricardo Wurmus <rekado@elephly.net> | 2024-01-02 22:46:00 +0100 |
---|---|---|
committer | Ricardo Wurmus <rekado@elephly.net> | 2024-01-02 22:47:07 +0100 |
commit | f4628000024219bf373922ff4a6fa752eb821797 (patch) | |
tree | 76b30b514934ea9bd53da4ac83e5a9a98a54b573 /gnu | |
parent | 8ed9ffedd4b7a01fe1ecb73e75931d2ccfcd4923 (diff) | |
download | guix-f4628000024219bf373922ff4a6fa752eb821797.tar guix-f4628000024219bf373922ff4a6fa752eb821797.tar.gz |
gnu: Add cpat.
* gnu/packages/bioinformatics.scm (cpat): New variable.
Change-Id: I7b3acca1bdec2610d7cdaaf6f96440fe000421dd
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index bdad03b000..41cac296fe 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -617,6 +617,50 @@ Compared to cellSNP, this package is more efficient with higher speed and less memory usage.") (license license:asl2.0)))) +(define-public cpat + (package + (name "cpat") + (version "3.0.4") + (source (origin + (method url-fetch) + (uri (pypi-uri "CPAT" version)) + (sha256 + (base32 + "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd")) + (modules '((guix build utils))) + (snippet + '(for-each delete-file-recursively + (list ".eggs" + "lib/__pycache__/" + "lib/cpmodule/__pycache__/"))))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + '(modify-phases %standard-phases + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (with-directory-excursion "test" + ;; There is no test4.fa + (substitute* "test.sh" + ((".*-g test4.fa.*") "")) + (invoke "bash" "test.sh")))))))) + (propagated-inputs + (list python-numpy python-pysam)) + (inputs + (list r-minimal)) + (home-page "https://wlcb.oit.uci.edu/cpat/") + (synopsis "Alignment-free distinction between coding and noncoding RNA") + (description + "CPAT is a method to distinguish coding and noncoding RNA by using a +logistic regression model based on four pure sequence-based, linguistic +features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias. +Linguistic features based method does not require other genomes or protein +databases to perform alignment and is more robust. Because it is +alignment-free, it runs much faster and also easier to use.") + (license license:gpl2+))) + (define-public pbcopper (package (name "pbcopper") |