From eb2337982888a1445d1b6067ff6090f08dd0b8ae Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Tue, 6 Dec 2022 15:14:46 +0100 Subject: gnu: Add python-cleanlab. * gnu/packages/machine-learning.scm (python-cleanlab): New variable. --- gnu/packages/machine-learning.scm | 46 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index fbc06f96b6..6546a17f5c 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -43,6 +43,7 @@ (define-module (gnu packages machine-learning) #:use-module (guix build-system cmake) #:use-module (guix build-system gnu) #:use-module (guix build-system ocaml) + #:use-module (guix build-system pyproject) #:use-module (guix build-system python) #:use-module (guix build-system r) #:use-module (guix build-system trivial) @@ -1283,6 +1284,51 @@ (define-public python-scikit-rebate standard feature selection algorithms.") (license license:expat))) +(define-public python-cleanlab + (package + (name "python-cleanlab") + (version "2.2.0") + ;; The version on pypi does not come with tests. + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/cleanlab/cleanlab") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "00dqhxpwg781skknw943ynll2s44g4j125dx8aapk1d5d71sbzqy")))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + '(modify-phases %standard-phases + (add-after 'unpack 'disable-bad-tests + (lambda _ + ;; XXX This requires pytest lazy_fixture + (delete-file "tests/test_multilabel_classification.py") + ;; Requires tensorflow + (delete-file "tests/test_frameworks.py") + ;; Tries to download datasets from the internet at runtime. + (delete-file "tests/test_dataset.py")))))) + (propagated-inputs + (list python-numpy + python-pandas + python-scikit-learn + python-termcolor + python-tqdm)) + (native-inputs + (list python-pytest + python-pytorch + python-torchvision)) + (home-page "https://cleanlab.ai") + (synopsis "Automatically find and fix dataset issues") + (description + "cleanlab automatically finds and fixes errors in any ML dataset. This +data-centric AI package facilitates machine learning with messy, real-world +data by providing clean labels during training.") + (license license:agpl3+))) + (define-public python-cmaes (package (name "python-cmaes") -- cgit v1.2.3