Loading pkgs/development/python-modules/pytorch-tokenizers/default.nix 0 → 100644 +94 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, replaceVars, # build-system cmake, pybind11, setuptools, # dependencies sentencepiece, tiktoken, tokenizers, # tests pytestCheckHook, transformers, }: let # https://github.com/meta-pytorch/tokenizers/blob/v1.0.1/CMakeLists.txt#L174-L175 pybind11-src = fetchFromGitHub { owner = "pybind"; repo = "pybind11"; tag = "v2.13.6"; hash = "sha256-SNLdtrOjaC3lGHN9MAqTf51U9EzNKQLyTMNPe0GcdrU="; }; in buildPythonPackage rec { pname = "pytorch-tokenizers"; version = "1.0.1"; pyproject = true; src = fetchFromGitHub { owner = "meta-pytorch"; repo = "tokenizers"; tag = "v${version}"; fetchSubmodules = true; hash = "sha256-1BGazimbauNBN/VfLiuhk21VEhbP07GEpPc+GAfKTQY="; }; patches = [ (replaceVars ./dont-fetch-pybind11.patch { pybind11 = pybind11-src; }) ]; postPatch = '' substituteInPlace pyproject.toml \ --replace-fail '"pip>=23",' "" \ --replace-fail '"pytest",' "" ''; build-system = [ cmake pybind11 setuptools ]; dontUseCmakeConfigure = true; dependencies = [ sentencepiece tiktoken tokenizers ]; pythonImportsCheck = [ "pytorch_tokenizers" "pytorch_tokenizers.pytorch_tokenizers_cpp" ]; preCheck = '' rm -rf pytorch_tokenizers ''; nativeCheckInputs = [ pytestCheckHook transformers ]; disabledTestPaths = [ # Require downloading models from huggingface "test/test_hf_tokenizer.py" ]; meta = { description = "C++ implementations for various tokenizers (sentencepiece, tiktoken, etc.)"; homepage = "https://github.com/meta-pytorch/tokenizers"; license = lib.licenses.bsd3; maintainers = with lib.maintainers; [ GaetanLepage ]; }; } pkgs/development/python-modules/pytorch-tokenizers/dont-fetch-pybind11.patch 0 → 100644 +14 −0 Original line number Diff line number Diff line diff --git a/CMakeLists.txt b/CMakeLists.txt index 97f0fe6..8c78f85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,8 +171,7 @@ if(TOKENIZERS_BUILD_PYTHON) include(FetchContent) FetchContent_Declare( pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.13.6 + URL @pybind11@ ) FetchContent_MakeAvailable(pybind11) pkgs/top-level/python-packages.nix +2 −0 Original line number Diff line number Diff line Loading @@ -15436,6 +15436,8 @@ self: super: with self; { pytorch-tabnet = callPackage ../development/python-modules/pytorch-tabnet { }; pytorch-tokenizers = callPackage ../development/python-modules/pytorch-tokenizers { }; pytorch3d = callPackage ../development/python-modules/pytorch3d { }; pytorchviz = callPackage ../development/python-modules/pytorchviz { }; Loading
pkgs/development/python-modules/pytorch-tokenizers/default.nix 0 → 100644 +94 −0 Original line number Diff line number Diff line { lib, buildPythonPackage, fetchFromGitHub, replaceVars, # build-system cmake, pybind11, setuptools, # dependencies sentencepiece, tiktoken, tokenizers, # tests pytestCheckHook, transformers, }: let # https://github.com/meta-pytorch/tokenizers/blob/v1.0.1/CMakeLists.txt#L174-L175 pybind11-src = fetchFromGitHub { owner = "pybind"; repo = "pybind11"; tag = "v2.13.6"; hash = "sha256-SNLdtrOjaC3lGHN9MAqTf51U9EzNKQLyTMNPe0GcdrU="; }; in buildPythonPackage rec { pname = "pytorch-tokenizers"; version = "1.0.1"; pyproject = true; src = fetchFromGitHub { owner = "meta-pytorch"; repo = "tokenizers"; tag = "v${version}"; fetchSubmodules = true; hash = "sha256-1BGazimbauNBN/VfLiuhk21VEhbP07GEpPc+GAfKTQY="; }; patches = [ (replaceVars ./dont-fetch-pybind11.patch { pybind11 = pybind11-src; }) ]; postPatch = '' substituteInPlace pyproject.toml \ --replace-fail '"pip>=23",' "" \ --replace-fail '"pytest",' "" ''; build-system = [ cmake pybind11 setuptools ]; dontUseCmakeConfigure = true; dependencies = [ sentencepiece tiktoken tokenizers ]; pythonImportsCheck = [ "pytorch_tokenizers" "pytorch_tokenizers.pytorch_tokenizers_cpp" ]; preCheck = '' rm -rf pytorch_tokenizers ''; nativeCheckInputs = [ pytestCheckHook transformers ]; disabledTestPaths = [ # Require downloading models from huggingface "test/test_hf_tokenizer.py" ]; meta = { description = "C++ implementations for various tokenizers (sentencepiece, tiktoken, etc.)"; homepage = "https://github.com/meta-pytorch/tokenizers"; license = lib.licenses.bsd3; maintainers = with lib.maintainers; [ GaetanLepage ]; }; }
pkgs/development/python-modules/pytorch-tokenizers/dont-fetch-pybind11.patch 0 → 100644 +14 −0 Original line number Diff line number Diff line diff --git a/CMakeLists.txt b/CMakeLists.txt index 97f0fe6..8c78f85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,8 +171,7 @@ if(TOKENIZERS_BUILD_PYTHON) include(FetchContent) FetchContent_Declare( pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.13.6 + URL @pybind11@ ) FetchContent_MakeAvailable(pybind11)
pkgs/top-level/python-packages.nix +2 −0 Original line number Diff line number Diff line Loading @@ -15436,6 +15436,8 @@ self: super: with self; { pytorch-tabnet = callPackage ../development/python-modules/pytorch-tabnet { }; pytorch-tokenizers = callPackage ../development/python-modules/pytorch-tokenizers { }; pytorch3d = callPackage ../development/python-modules/pytorch3d { }; pytorchviz = callPackage ../development/python-modules/pytorchviz { };