Commit 95b84e94 authored by Daniël de Kok's avatar Daniël de Kok
Browse files

python3Packages.spacy-models: fix model builds

Fix various issues with building spaCy models:

- Transformer models now use `spacy-curated-transformers`.
- `sl_core_news_trf` requires `protobuf` and `sentencepiece`.
- `ja` models require `sudachipy` and `sudachidict-core`
- Loosen the `pkuseg` requirement for `zh` models (0.3 -> 1.0 does not
  have API changes).
parent a1accfb8
Loading
Loading
Loading
Loading
+28 −8
Original line number Diff line number Diff line
@@ -9,7 +9,10 @@
  setuptools,
  spacy,
  spacy-pkuseg,
  spacy-transformers,
  spacy-curated-transformers,
  sudachipy,
  sudachidict-core,
  transformers,
  writeScript,
  stdenv,
  jq,
@@ -27,7 +30,10 @@ let

    let
      lang = builtins.substring 0 2 pname;
      requires-protobuf = pname == "fr_dep_news_trf" || pname == "uk_core_news_trf";
      requires-protobuf =
        pname == "fr_dep_news_trf" || pname == "sl_core_news_trf" || pname == "uk_core_news_trf";
      requires-sentencepiece = pname == "fr_dep_news_trf" || pname == "sl_core_news_trf";
      requires-transformers = pname == "uk_core_news_trf";
    in
    buildPythonPackage {
      inherit pname version;
@@ -40,18 +46,32 @@ let

      propagatedBuildInputs =
        [ spacy ]
        ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-transformers ]
        ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-curated-transformers ]
        ++ lib.optionals requires-transformers [ transformers ]
        ++ lib.optionals (lang == "ja") [
          sudachidict-core
          sudachipy
        ]
        ++ lib.optionals (lang == "ru") [ pymorphy3 ]
        ++ lib.optionals (lang == "uk") [
          pymorphy3
          pymorphy3-dicts-uk
        ]
        ++ lib.optionals (lang == "zh") [ spacy-pkuseg ]
        ++ lib.optionals (pname == "fr_dep_news_trf") [ sentencepiece ];
        ++ lib.optionals requires-sentencepiece [ sentencepiece ];

      postPatch = lib.optionalString requires-protobuf ''
      postPatch =
        lib.optionalString requires-protobuf ''
          substituteInPlace meta.json \
            --replace-fail "protobuf<3.21.0" "protobuf"
        ''
        + lib.optionalString (lang == "zh") ''
          # Uses numpy 2.x, while the rest of the dependencies still uses
          # numpy 1.x. Remove once all spaCy packages are updated for
          # numpy 2.x.
          cat meta.json
          substituteInPlace meta.json \
          --replace "protobuf<3.21.0" "protobuf"
          --replace-fail "spacy-pkuseg>=1.0.0,<2.0.0" "spacy-pkuseg"
        '';

      nativeBuildInputs = [ setuptools ] ++ lib.optionals requires-protobuf [ protobuf ];