Unverified Commit 4a2c6cc5 authored by Yt's avatar Yt Committed by GitHub
Browse files

python3Packages.nltk: add data(Dir) passthru, run tests (#409680)

parents aad53c52 020c1daa
Loading
Loading
Loading
Loading
+4 −10
Original line number Diff line number Diff line
@@ -5,8 +5,6 @@
  python3,
  makeWrapper,
  nix-update-script,
  symlinkJoin,
  nltk-data,
}:
let
  pythonEnv = python3.withPackages (
@@ -147,14 +145,10 @@ let
    ++ unstructured.optional-dependencies.all-docs
  );
  version = "0.0.82";
  unstructured_api_nltk_data = symlinkJoin {
    name = "unstructured_api_nltk_data";

    paths = [
      nltk-data.punkt
      nltk-data.averaged-perceptron-tagger
    ];
  };
  unstructured_api_nltk_data = python3.pkgs.nltk.dataDir (d: [
    d.punkt
    d.averaged-perceptron-tagger
  ]);
in
stdenvNoCC.mkDerivation {
  pname = "unstructured-api";
+4 −9
Original line number Diff line number Diff line
@@ -6,8 +6,6 @@
  gitMinimal,
  portaudio,
  playwright-driver,
  symlinkJoin,
  nltk-data,
  pythonOlder,
  pythonAtLeast,
  setuptools-scm,
@@ -122,13 +120,10 @@
}:

let
  aider-nltk-data = symlinkJoin {
    name = "aider-nltk-data";
    paths = [
      nltk-data.punkt-tab
      nltk-data.stopwords
    ];
  };
  aider-nltk-data = nltk.dataDir (d: [
    d.punkt-tab
    d.stopwords
  ]);

  version = "0.83.1";
  aider-chat = buildPythonPackage {
+15 −0
Original line number Diff line number Diff line
{
  lib,
  pkgs,
  python3Packages,
}:
lib.makeOverridable (
  { ... }@nltkDataPkgs:
  f:
  pkgs.symlinkJoin {
    inherit (python3Packages.nltk) meta;
    name = "nltk-data-dir";

    paths = f nltkDataPkgs;
  }
) python3Packages.nltk.data
+70 −9
Original line number Diff line number Diff line
{
  lib,
  pkgs,
  fetchPypi,
  buildPythonPackage,
  pythonOlder,
@@ -7,6 +8,16 @@
  joblib,
  regex,
  tqdm,

  # preInstallCheck
  nltk,

  # nativeCheckInputs
  matplotlib,
  numpy,
  pyparsing,
  pytestCheckHook,
  pytest-mock,
}:

buildPythonPackage rec {
@@ -21,28 +32,78 @@ buildPythonPackage rec {
    hash = "sha256-h9EnvT3kvYmk+BJl5fpZyxsZmydEAXU3D3QX0rx66Gg=";
  };

  propagatedBuildInputs = [
  dependencies = [
    click
    joblib
    regex
    tqdm
  ];

  # Tests require some data, the downloading of which is impure. It would
  # probably make sense to make the data another derivation, but then feeding
  # that into the tests (given that we need nltk itself to download the data,
  # unless there's an easy way to download it without nltk's downloader) might
  # be complicated. For now let's just disable the tests and hope for the
  # best.
  doCheck = false;
  # Use new passthru function to pass dependencies required for testing
  preInstallCheck = ''
    export NLTK_DATA=${
      nltk.dataDir (
        d: with d; [
          averaged-perceptron-tagger-eng
          averaged-perceptron-tagger-rus
          brown
          cess-cat
          cess-esp
          conll2007
          floresta
          gutenberg
          inaugural
          indian
          large-grammars
          nombank-1-0
          omw-1-4
          pl196x
          porter-test
          ptb
          punkt-tab
          rte
          sinica-treebank
          stopwords
          tagsets-json
          treebank
          twitter-samples
          udhr
          universal-tagset
          wmt15-eval
          wordnet
          wordnet-ic
          words
        ]
      )
    }
  '';

  nativeCheckInputs = [
    pytestCheckHook
    matplotlib
    numpy
    pyparsing
    pytest-mock

    pkgs.which
  ];

  disabledTestPaths = [
    "nltk/test/unit/test_downloader.py" # Touches network
  ];

  pythonImportsCheck = [ "nltk" ];

  passthru = {
    data = pkgs.nltk-data;
    dataDir = pkgs.callPackage ./data-dir.nix { };
  };

  meta = with lib; {
    description = "Natural Language Processing ToolKit";
    mainProgram = "nltk";
    homepage = "http://nltk.org/";
    license = licenses.asl20;
    maintainers = [ ];
    maintainers = [ lib.maintainers.bengsparks ];
  };
}
+5 −10
Original line number Diff line number Diff line
@@ -16,18 +16,13 @@
  python-dateutil,
  scipy,
  toml,
  nltk-data,
  symlinkJoin,
}:
let
  testNltkData = symlinkJoin {
    name = "nltk-test-data";
    paths = [
      nltk-data.punkt
      nltk-data.punkt-tab
      nltk-data.stopwords
    ];
  };
  testNltkData = nltk.dataDir (d: [
    d.punkt
    d.punkt-tab
    d.stopwords
  ]);

  version = "0.0.21";
  tag = "v${version}";