Loading pkgs/development/python-modules/affinegap/default.nix 0 → 100644 +50 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system cython, setuptools, # tests pytestCheckHook, }: buildPythonPackage rec { pname = "affinegap"; version = "1.12"; pyproject = true; src = fetchFromGitHub { owner = "dedupeio"; repo = "affinegap"; tag = "v${version}"; hash = "sha256-9eX41eoME5Vdtq+c04eQbMYnViy6QKOhKkafrkeMylI="; }; build-system = [ cython setuptools ]; nativeCheckInputs = [ pytestCheckHook ]; # Prevent importing from source during test collection (only $out has compiled extensions) preCheck = '' rm -rf affinegap ''; pythonImportsCheck = [ "affinegap" ]; meta = { description = "Cython implementation of the affine gap string distance"; homepage = "https://github.com/dedupeio/affinegap"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; } pkgs/development/python-modules/categorical-distance/default.nix 0 → 100644 +53 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, unstableGitUpdater, # build-system setuptools, # dependencies numpy, # tests pytestCheckHook, }: buildPythonPackage { pname = "categorical-distance"; version = "1.9-unstable-2020-03-31"; pyproject = true; src = fetchFromGitHub { owner = "dedupeio"; repo = "categorical-distance"; rev = "07d079fd412ccf06cdb200b3cd2cfa4b67f78722"; hash = "sha256-zSjSrlFiRus/T2XZdakLQpF1u/LV0VNWwrc8lhss6kU="; }; build-system = [ setuptools ]; dependencies = [ numpy ]; nativeCheckInputs = [ pytestCheckHook ]; pythonImportsCheck = [ "categorical" ]; passthru.updateScript = unstableGitUpdater { }; meta = { description = "Compare similarity of categorical variables using Jaccard index"; homepage = "https://github.com/dedupeio/categorical-distance"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; } pkgs/development/python-modules/dedupe-levenshtein-search/default.nix 0 → 100644 +45 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system setuptools, # tests pytestCheckHook, }: buildPythonPackage rec { pname = "dedupe-levenshtein-search"; version = "1.4.5"; pyproject = true; # NOTE: This is a fork of mattandahalfew/Levenshtein_search created for MIT licensing. # TODO: Evaluate if upstream version could be used instead. src = fetchFromGitHub { owner = "dedupeio"; repo = "Levenshtein_search"; tag = "v${version}"; hash = "sha256-YhsZA28H4OUkQEBtJ+9OXJld4Z/PJbOPqAQQ9qaXSjk="; }; build-system = [ setuptools ]; nativeCheckInputs = [ pytestCheckHook ]; pythonImportsCheck = [ "Levenshtein_search" ]; meta = { description = "Search through documents for approximately matching strings using Levenshtein distance"; homepage = "https://github.com/dedupeio/Levenshtein_search"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; } pkgs/development/python-modules/dedupe-pylbfgs/default.nix 0 → 100644 +59 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system cython, numpy, setuptools, # tests pytest-cov-stub, pytestCheckHook, }: buildPythonPackage rec { pname = "dedupe-pylbfgs"; version = "0.2.0.16"; pyproject = true; # NOTE: This is a fork of larsmans/pylbfgs maintained by dedupeio src = fetchFromGitHub { owner = "dedupeio"; repo = "pylbfgs"; tag = "${version}"; hash = "sha256-H416dgZQxyqsnhmlK5keW8cJWY6gea4mebVuP0IEVOU="; }; build-system = [ cython numpy setuptools ]; dependencies = [ numpy ]; nativeCheckInputs = [ pytest-cov-stub pytestCheckHook ]; # Prevent importing from source during test collection (only $out has compiled extensions) preCheck = '' rm -rf lbfgs ''; pythonImportsCheck = [ "lbfgs" ]; meta = { description = "Python wrapper for L-BFGS and OWL-QN optimization algorithms"; homepage = "https://github.com/dedupeio/pylbfgs"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; } pkgs/development/python-modules/dedupe/default.nix 0 → 100644 +106 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system cython, setuptools, # dependencies affinegap, btrees, categorical-distance, dedupe-levenshtein-search, doublemetaphone, haversine, highered, numpy, scikit-learn, simplecosine, zope-index, dedupe, # tests pytest-cov-stub, pytestCheckHook, python, runCommand, }: buildPythonPackage rec { pname = "dedupe"; version = "3.0.3"; pyproject = true; src = fetchFromGitHub { owner = "dedupeio"; repo = "dedupe"; tag = "v${version}"; hash = "sha256-tfBJeaeZw5w5OwM+AOfy9H6P2zbShjN/kuzEbpxATHI="; }; build-system = [ cython setuptools ]; dependencies = [ affinegap btrees categorical-distance dedupe-levenshtein-search doublemetaphone haversine highered numpy scikit-learn simplecosine zope-index ]; nativeCheckInputs = [ pytest-cov-stub pytestCheckHook ]; # Remove source directory so pytest imports compiled extension from $out preCheck = '' rm -rf dedupe ''; pythonImportsCheck = [ "dedupe" ]; passthru.tests.benchmarks = runCommand "dedupe-benchmarks-test" { nativeBuildInputs = [ (python.withPackages (ps: [ dedupe ])) ]; } '' # Copy benchmarks to writable location cp -r ${src}/benchmarks benchmarks chmod -R +w benchmarks cd benchmarks # Run all three canonical benchmarks for benchmark in canonical canonical_gazetteer canonical_matching; do echo "Running $benchmark benchmark..." # Redirect stderr to /dev/null (`2>/dev/null`) to suppress Python 3.13 # multiprocessing resource tracker warnings from scikit-learn/joblib subprocesses # `|| exit 1` provides fail-fast behavior: exit immediately if any benchmark fails PYTHONPATH=$PWD python -m benchmarks.$benchmark 2>/dev/null || exit 1 done touch $out ''; meta = { description = "Library for accurate and scalable fuzzy matching, deduplication and entity resolution"; homepage = "https://github.com/dedupeio/dedupe"; changelog = "https://github.com/dedupeio/dedupe/blob/${src.tag}/CHANGELOG.md"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; } Loading
pkgs/development/python-modules/affinegap/default.nix 0 → 100644 +50 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system cython, setuptools, # tests pytestCheckHook, }: buildPythonPackage rec { pname = "affinegap"; version = "1.12"; pyproject = true; src = fetchFromGitHub { owner = "dedupeio"; repo = "affinegap"; tag = "v${version}"; hash = "sha256-9eX41eoME5Vdtq+c04eQbMYnViy6QKOhKkafrkeMylI="; }; build-system = [ cython setuptools ]; nativeCheckInputs = [ pytestCheckHook ]; # Prevent importing from source during test collection (only $out has compiled extensions) preCheck = '' rm -rf affinegap ''; pythonImportsCheck = [ "affinegap" ]; meta = { description = "Cython implementation of the affine gap string distance"; homepage = "https://github.com/dedupeio/affinegap"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; }
pkgs/development/python-modules/categorical-distance/default.nix 0 → 100644 +53 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, unstableGitUpdater, # build-system setuptools, # dependencies numpy, # tests pytestCheckHook, }: buildPythonPackage { pname = "categorical-distance"; version = "1.9-unstable-2020-03-31"; pyproject = true; src = fetchFromGitHub { owner = "dedupeio"; repo = "categorical-distance"; rev = "07d079fd412ccf06cdb200b3cd2cfa4b67f78722"; hash = "sha256-zSjSrlFiRus/T2XZdakLQpF1u/LV0VNWwrc8lhss6kU="; }; build-system = [ setuptools ]; dependencies = [ numpy ]; nativeCheckInputs = [ pytestCheckHook ]; pythonImportsCheck = [ "categorical" ]; passthru.updateScript = unstableGitUpdater { }; meta = { description = "Compare similarity of categorical variables using Jaccard index"; homepage = "https://github.com/dedupeio/categorical-distance"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; }
pkgs/development/python-modules/dedupe-levenshtein-search/default.nix 0 → 100644 +45 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system setuptools, # tests pytestCheckHook, }: buildPythonPackage rec { pname = "dedupe-levenshtein-search"; version = "1.4.5"; pyproject = true; # NOTE: This is a fork of mattandahalfew/Levenshtein_search created for MIT licensing. # TODO: Evaluate if upstream version could be used instead. src = fetchFromGitHub { owner = "dedupeio"; repo = "Levenshtein_search"; tag = "v${version}"; hash = "sha256-YhsZA28H4OUkQEBtJ+9OXJld4Z/PJbOPqAQQ9qaXSjk="; }; build-system = [ setuptools ]; nativeCheckInputs = [ pytestCheckHook ]; pythonImportsCheck = [ "Levenshtein_search" ]; meta = { description = "Search through documents for approximately matching strings using Levenshtein distance"; homepage = "https://github.com/dedupeio/Levenshtein_search"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; }
pkgs/development/python-modules/dedupe-pylbfgs/default.nix 0 → 100644 +59 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system cython, numpy, setuptools, # tests pytest-cov-stub, pytestCheckHook, }: buildPythonPackage rec { pname = "dedupe-pylbfgs"; version = "0.2.0.16"; pyproject = true; # NOTE: This is a fork of larsmans/pylbfgs maintained by dedupeio src = fetchFromGitHub { owner = "dedupeio"; repo = "pylbfgs"; tag = "${version}"; hash = "sha256-H416dgZQxyqsnhmlK5keW8cJWY6gea4mebVuP0IEVOU="; }; build-system = [ cython numpy setuptools ]; dependencies = [ numpy ]; nativeCheckInputs = [ pytest-cov-stub pytestCheckHook ]; # Prevent importing from source during test collection (only $out has compiled extensions) preCheck = '' rm -rf lbfgs ''; pythonImportsCheck = [ "lbfgs" ]; meta = { description = "Python wrapper for L-BFGS and OWL-QN optimization algorithms"; homepage = "https://github.com/dedupeio/pylbfgs"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; }
pkgs/development/python-modules/dedupe/default.nix 0 → 100644 +106 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, # build-system cython, setuptools, # dependencies affinegap, btrees, categorical-distance, dedupe-levenshtein-search, doublemetaphone, haversine, highered, numpy, scikit-learn, simplecosine, zope-index, dedupe, # tests pytest-cov-stub, pytestCheckHook, python, runCommand, }: buildPythonPackage rec { pname = "dedupe"; version = "3.0.3"; pyproject = true; src = fetchFromGitHub { owner = "dedupeio"; repo = "dedupe"; tag = "v${version}"; hash = "sha256-tfBJeaeZw5w5OwM+AOfy9H6P2zbShjN/kuzEbpxATHI="; }; build-system = [ cython setuptools ]; dependencies = [ affinegap btrees categorical-distance dedupe-levenshtein-search doublemetaphone haversine highered numpy scikit-learn simplecosine zope-index ]; nativeCheckInputs = [ pytest-cov-stub pytestCheckHook ]; # Remove source directory so pytest imports compiled extension from $out preCheck = '' rm -rf dedupe ''; pythonImportsCheck = [ "dedupe" ]; passthru.tests.benchmarks = runCommand "dedupe-benchmarks-test" { nativeBuildInputs = [ (python.withPackages (ps: [ dedupe ])) ]; } '' # Copy benchmarks to writable location cp -r ${src}/benchmarks benchmarks chmod -R +w benchmarks cd benchmarks # Run all three canonical benchmarks for benchmark in canonical canonical_gazetteer canonical_matching; do echo "Running $benchmark benchmark..." # Redirect stderr to /dev/null (`2>/dev/null`) to suppress Python 3.13 # multiprocessing resource tracker warnings from scikit-learn/joblib subprocesses # `|| exit 1` provides fail-fast behavior: exit immediately if any benchmark fails PYTHONPATH=$PWD python -m benchmarks.$benchmark 2>/dev/null || exit 1 done touch $out ''; meta = { description = "Library for accurate and scalable fuzzy matching, deduplication and entity resolution"; homepage = "https://github.com/dedupeio/dedupe"; changelog = "https://github.com/dedupeio/dedupe/blob/${src.tag}/CHANGELOG.md"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ daniel-fahey ]; }; }