Unverified Commit d0c3e238 authored by Zexin Yuan's avatar Zexin Yuan
Browse files

python3Packages.tree-sitter-language-pack: 0.13.0 -> 1.4.1

parent a8526804
Loading
Loading
Loading
Loading
+131 −57
Original line number Diff line number Diff line
{
  lib,
  buildPackages,
  buildPythonPackage,
  fetchPypi,
  fetchFromGitHub,
  fetchurl,
  python,
  pytestCheckHook,
  nix-update-script,

  # build-system
  cython,
  setuptools,
  typing-extensions,

  # dependencies
  rustPlatform,
  stdenv,
  tree-sitter,
  tree-sitter-c-sharp,
  tree-sitter-embedded-template,
  tree-sitter-yaml,
}:

buildPythonPackage rec {
let
  parserReleaseUrl =
    version: "https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases/download/v${version}";

  parserBundleSpecs = {
    aarch64-darwin = {
      suffix = "macos-arm64";
      hash = "sha256-pYrgwhb3BkOEqot5JBi26aXBciGt7/zP/1+HcQT2vsw=";
    };
    aarch64-linux = {
      suffix = "linux-aarch64";
      hash = "sha256-t1rWm19iExYAZXluMQqlt9bOkEC2UumcxDov8YmYEEQ=";
    };
    x86_64-linux = {
      suffix = "linux-x86_64";
      hash = "sha256-o4IpLZDitTsHfF2KMnyB3Wry7Hig7Byxd0JLcZPybJ0=";
    };
  };
in
buildPythonPackage (finalAttrs: {
  pname = "tree-sitter-language-pack";
  version = "0.13.0";
  pyproject = true;
  version = "1.4.1";

  # Using the GitHub sources necessitates fetching the treesitter grammar parsers by using a vendored script.
  # The pypi archive has the benefit of already vendoring those dependencies which makes packaging easier on our side
  # See: https://github.com/Goldziher/tree-sitter-language-pack/blob/main/scripts/clone_vendors.py
  src = fetchPypi {
    pname = "tree_sitter_language_pack";
    inherit version;
    hash = "sha256-AyA0xeJ7H24AcwuefC28ggO0cA0MaB/QGdbe/PYRg+w=";
  src = fetchFromGitHub {
    owner = "kreuzberg-dev";
    repo = "tree-sitter-language-pack";
    tag = "v${finalAttrs.version}";
    hash = "sha256-kN2htitEOo+JF6DCrC4RHmHkZXnUA0fUo2jSbMELQHI=";
  };

  # Upstream bumped dependencies aggressively, but we can still use older
  # versions since the newer ones aren’t packaged in nixpkgs. We can't use
  # pythonRelaxDepsHook here because it runs in postBuild, while the dependency
  # check occurs during the build phase.
  postPatch = ''
    substituteInPlace pyproject.toml \
      --replace-fail "typing-extensions>=4.15.0" "typing-extensions>=4.14.1"
  '';
  cargoDeps = rustPlatform.fetchCargoVendor {
    inherit (finalAttrs)
      pname
      version
      src
      ;
    hash = "sha256-ii3rvAfs4xMSyEEDjUrjL2SAONd0ARCVhwQNCJLwuCk=";
  };

  nativeCheckInputs = [
    pytestCheckHook
  ];
  buildAndTestSubdir = "crates/ts-pack-python";

  build-system = [
    cython
    setuptools
    typing-extensions
  ];
  # Pin the release metadata and per-platform parser archive so runtime use stays offline.
  parserManifest = fetchurl {
    url = "${parserReleaseUrl finalAttrs.version}/parsers.json";
    hash = "sha256-8utASonvrLzOjxZcmRuzuFSGtYe5sEoMU+xz++bfmkk=";
  };

  dependencies = [
    tree-sitter
    tree-sitter-c-sharp
    tree-sitter-embedded-template
    tree-sitter-yaml
  ];
  parserBundle =
    let
      spec =
        parserBundleSpecs.${stdenv.hostPlatform.system}
          or (throw "tree-sitter-language-pack parser bundle is unavailable for ${stdenv.hostPlatform.system}");
    in
    fetchurl {
      url = "${parserReleaseUrl finalAttrs.version}/parsers-${spec.suffix}.tar.zst";
      inherit (spec) hash;
    };

  pythonRelaxDeps = [
    "tree-sitter"
    "tree-sitter-embedded-template"
    "tree-sitter-yaml"
  nativeBuildInputs = [
    buildPackages.zstd
    rustPlatform.cargoSetupHook
    rustPlatform.maturinBuildHook
  ];

  pythonImportsCheck = [
    "tree_sitter_language_pack"
    "tree_sitter_language_pack.bindings"
  nativeCheckInputs = [ pytestCheckHook ];

  dependencies = [ tree-sitter ];

  disabledTests = [
    # tree-sitter-language-pack 1.4.1 upstream smoke tests expect these aliases
    # to resolve directly in the offline cache, but the packaged bundle still
    # exposes the underlying parser library names.
    "test_get_language_returns_non_none"
    "test_get_parser_for_previously_broken_languages"
    "test_has_language_for_previously_broken"
  ];

  # make sure import the built version, not the source one
  preCheck = ''
    rm -r tree_sitter_language_pack
    # Mirror the upstream cache layout: libs live in cache_dir, while the manifest
    # is expected at cache_dir/../manifest.json.
    cacheRoot=$PWD/.tree-sitter-language-pack-cache
    cacheDir="$cacheRoot/libs"
    mkdir -p "$cacheDir"
    cp ${finalAttrs.parserManifest} "$cacheRoot/manifest.json"
    ${lib.getExe buildPackages.zstd} -d -c ${finalAttrs.parserBundle} | tar -xvf - -C "$cacheDir" >/dev/null

    # Upstream smoke tests call download APIs even when the parsers are already
    # available locally, so point them at the pre-fetched cache and short-circuit
    # redundant network downloads during pytest.
    cat > conftest.py <<EOF
    import json
    from pathlib import Path

    import tree_sitter_language_pack as tslp

    _cache_dir = Path(r"$cacheDir")
    _manifest_path = _cache_dir.parent / "manifest.json"

    tslp.configure(cache_dir=str(_cache_dir))

    def _manifest_languages():
        return sorted(json.loads(_manifest_path.read_text())["languages"].keys())

    def _download(names):
        return 0

    def _download_all():
        return 0

    tslp.manifest_languages = _manifest_languages
    tslp.download = _download
    tslp.download_all = _download_all
    EOF
  '';

  passthru.updateScript = nix-update-script { };
  pytestFlagsArray = [
    "e2e/python/tests"
    "tests/test_apps/python/smoke_test.py"
  ];

  postInstall = ''
    cacheRoot=$out/share/tree-sitter-language-pack
    cacheDir="$cacheRoot/libs"
    mkdir -p "$cacheDir"
    cp ${finalAttrs.parserManifest} "$cacheRoot/manifest.json"
    ${lib.getExe buildPackages.zstd} -d -c ${finalAttrs.parserBundle} | tar -xvf - -C "$cacheDir" >/dev/null

    # Make the installed package default to the pre-fetched cache in $out.
    substituteInPlace $out/${python.sitePackages}/tree_sitter_language_pack/__init__.py \
      --replace-fail 'SupportedLanguage: TypeAlias = str' $'configure(cache_dir="'$cacheDir$'")\n\nSupportedLanguage: TypeAlias = str'
  '';

  pythonImportsCheck = [ "tree_sitter_language_pack" ];

  passthru.updateScript = ./update.sh;

  meta = {
    description = "Comprehensive collection of tree-sitter languages";
    homepage = "https://github.com/Goldziher/tree-sitter-language-pack";
    changelog = "https://github.com/Goldziher/tree-sitter-language-pack/releases/tag/v${version}";
    description = "Comprehensive collection of tree-sitter language parsers with polyglot bindings";
    homepage = "https://github.com/kreuzberg-dev/tree-sitter-language-pack";
    changelog = "https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases/tag/v${finalAttrs.version}";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ yzx9 ];
    platforms = builtins.attrNames parserBundleSpecs;
  };
}
})