Commit e3cdb1ab authored by Gaetan Lepage's avatar Gaetan Lepage
Browse files
parent 573c650e
Loading
Loading
Loading
Loading
+18 −11
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
  pytestCheckHook,
  requests,
  tiktoken,
  writableTmpDirAsHomeHook,
}:

let
@@ -70,16 +71,24 @@ let
in
buildPythonPackage rec {
  pname = "tokenizers";
  version = "0.21.0";
  version = "0.21.1";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "huggingface";
    repo = "tokenizers";
    tag = "v${version}";
    hash = "sha256-G65XiVlvJXOC9zqcVr9vWamUnpC0aa4kyYkE2v1K2iY=";
    hash = "sha256-3S7ZCaZnnwyNjoZ4Y/q3ngQE2MIm2iyCCjYAkdMVG2A=";
  };

  # TestUnigram.test_continuing_prefix_trainer_mismatch fails with:
  # Exception: No such file or directory (os error 2)
  # Fix submitted upstream: https://github.com/huggingface/tokenizers/pull/1747
  postPatch = ''
    substituteInPlace tests/bindings/test_trainers.py \
      --replace-fail '"data/' '"tests/data/'
  '';

  cargoDeps = rustPlatform.fetchCargoVendor {
    inherit
      pname
@@ -87,7 +96,7 @@ buildPythonPackage rec {
      src
      sourceRoot
      ;
    hash = "sha256-jj5nuwxlfJm1ugYd5zW+wjyczOZHWCmRGYpmiMDqFlk=";
    hash = "sha256-I7LlBmeVY2rWI0ta6x311iAurQKuutsClrbUgkt9xWk=";
  };

  sourceRoot = "${src.name}/bindings/python";
@@ -115,18 +124,16 @@ buildPythonPackage rec {
    pytestCheckHook
    requests
    tiktoken
    writableTmpDirAsHomeHook
  ];

  postUnpack = ''
  postUnpack =
    # Add data files for tests, otherwise tests attempt network access
    ''
      mkdir $sourceRoot/tests/data
      ln -s ${test-data}/* $sourceRoot/tests/data/
    '';

  preCheck = ''
    export HOME=$(mktemp -d);
  '';

  pythonImportsCheck = [ "tokenizers" ];

  disabledTests = [