Unverified Commit d52d960d authored by Sandro Jäckel's avatar Sandro Jäckel Committed by GitHub
Browse files

python3Packages.spacy-models: fix model builds (#356705)

parents d81bb93c c019ccc8
Loading
Loading
Loading
Loading
+56 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  cython,
  setuptools,
  regex,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "curated-tokenizers";
  version = "0.0.9";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "explosion";
    repo = "curated-tokenizers";
    tag = "v${version}";
    hash = "sha256-P8kpPnaU3el7sc/vUn4waQN+JV7F9b49i6BtC4BFfIg=";
    fetchSubmodules = true;
  };

  build-system = [
    cython
    setuptools
  ];

  dependencies = [
    regex
  ];

  nativeCheckInputs = [
    pytestCheckHook
  ];

  # Explicitly set the path to avoid running vendored
  # sentencepiece tests.
  pytestFlagsArray = [ "tests" ];

  preCheck = ''
    # avoid local paths, relative imports wont resolve correctly
    mv curated_tokenizers/tests tests
    rm -r curated_tokenizers
  '';

  pythonImportsCheck = [ "curated_tokenizers" ];

  meta = with lib; {
    description = "Lightweight piece tokenization library";
    homepage = "https://github.com/explosion/curated-tokenizers";
    changelog = "https://github.com/explosion/curated-tokenizers/releases/tag/v${version}";
    license = licenses.mit;
    maintainers = with maintainers; [ danieldk ];
  };
}
+36 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  torch,
}:

buildPythonPackage rec {
  pname = "curated-transformers";
  version = "0.1.1";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "explosion";
    repo = "curated-transformers";
    tag = "v${version}";
    hash = "sha256-QhJZnQIa9TilwdQCUlxnQCEc6Suj669cht6WHUAr/Gw=";
  };

  build-system = [ setuptools ];

  dependencies = [ torch ];

  # Unit tests are hard to use, since most tests rely on downloading
  # models from Hugging Face Hub.
  pythonImportCheck = [ "curated_transformers" ];

  meta = with lib; {
    description = "PyTorch library of curated Transformer models and their composable components";
    homepage = "https://github.com/explosion/curated-transformers";
    changelog = "https://github.com/explosion/curated-transformers/releases/tag/v${version}";
    license = licenses.mit;
    maintainers = with maintainers; [ danieldk ];
  };
}
+44 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  curated-tokenizers,
  curated-transformers,
  spacy,
  torch,
}:

buildPythonPackage rec {
  pname = "spacy-curated-transformers";
  version = "0.3.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "explosion";
    repo = "spacy-curated-transformers";
    tag = "release-v${version}";
    hash = "sha256-3LL0ofVsyacMzLJtttg0Tl9SlkPex7TwWL/HVF4WkfI=";
  };

  build-system = [ setuptools ];

  dependencies = [
    curated-tokenizers
    curated-transformers
    spacy
    torch
  ];

  # Unit tests are hard to use, since most tests rely on downloading
  # models from Hugging Face Hub.
  pythonImportCheck = [ "spacy_curated_transformers" ];

  meta = with lib; {
    description = "spaCy entry points for Curated Transformers";
    homepage = "https://github.com/explosion/spacy-curated-transformers";
    changelog = "https://github.com/explosion/spacy-curated-transformers/releases/tag/v${version}";
    license = licenses.mit;
    maintainers = with maintainers; [ danieldk ];
  };
}
+27 −8
Original line number Diff line number Diff line
@@ -9,7 +9,10 @@
  setuptools,
  spacy,
  spacy-pkuseg,
  spacy-transformers,
  spacy-curated-transformers,
  sudachipy,
  sudachidict-core,
  transformers,
  writeScript,
  stdenv,
  jq,
@@ -27,7 +30,10 @@ let

    let
      lang = builtins.substring 0 2 pname;
      requires-protobuf = pname == "fr_dep_news_trf" || pname == "uk_core_news_trf";
      requires-protobuf =
        pname == "fr_dep_news_trf" || pname == "sl_core_news_trf" || pname == "uk_core_news_trf";
      requires-sentencepiece = pname == "fr_dep_news_trf" || pname == "sl_core_news_trf";
      requires-transformers = pname == "uk_core_news_trf";
    in
    buildPythonPackage {
      inherit pname version;
@@ -40,18 +46,31 @@ let

      propagatedBuildInputs =
        [ spacy ]
        ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-transformers ]
        ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-curated-transformers ]
        ++ lib.optionals requires-transformers [ transformers ]
        ++ lib.optionals (lang == "ja") [
          sudachidict-core
          sudachipy
        ]
        ++ lib.optionals (lang == "ru") [ pymorphy3 ]
        ++ lib.optionals (lang == "uk") [
          pymorphy3
          pymorphy3-dicts-uk
        ]
        ++ lib.optionals (lang == "zh") [ spacy-pkuseg ]
        ++ lib.optionals (pname == "fr_dep_news_trf") [ sentencepiece ];
        ++ lib.optionals requires-sentencepiece [ sentencepiece ];

      postPatch = lib.optionalString requires-protobuf ''
      postPatch =
        lib.optionalString requires-protobuf ''
          substituteInPlace meta.json \
            --replace-fail "protobuf<3.21.0" "protobuf"
        ''
        + lib.optionalString (lang == "zh") ''
          # Uses numpy 2.x, while the rest of the dependencies still uses
          # numpy 1.x. Remove once all spaCy packages are updated for
          # numpy 2.x.
          substituteInPlace meta.json \
          --replace "protobuf<3.21.0" "protobuf"
            --replace-fail "spacy-pkuseg>=1.0.0,<2.0.0" "spacy-pkuseg"
        '';

      nativeBuildInputs = [ setuptools ] ++ lib.optionals requires-protobuf [ protobuf ];
+6 −0
Original line number Diff line number Diff line
@@ -2831,6 +2831,10 @@ self: super: with self; {
  cu2qu = callPackage ../development/python-modules/cu2qu { };
  curated-tokenizers = callPackage ../development/python-modules/curated-tokenizers { };
  curated-transformers = callPackage ../development/python-modules/curated-transformers { };
  customtkinter = callPackage ../development/python-modules/customtkinter { };
  cucumber-tag-expressions = callPackage ../development/python-modules/cucumber-tag-expressions { };
@@ -15327,6 +15331,8 @@ self: super: with self; {
  spacy-alignments = callPackage ../development/python-modules/spacy-alignments { };
  spacy-curated-transformers = callPackage ../development/python-modules/spacy-curated-transformers { };
  spacy-legacy = callPackage ../development/python-modules/spacy/legacy.nix { };
  spacy-loggers = callPackage ../development/python-modules/spacy-loggers { };