Unverified Commit 079adbf2 authored by Yt's avatar Yt Committed by GitHub
Browse files

python3Packages.tree-sitter-language-pack: 0.13.0 -> 1.4.1 (#503590)

parents efde085d e0611dbc
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -37,6 +37,9 @@ let

    pythonRemoveDeps = [
      "importlib-resources"
      "tree-sitter-c-sharp"
      "tree-sitter-embedded-template"
      "tree-sitter-yaml"
    ];

    build-system = with python3Packages; [ setuptools-scm ];
@@ -200,6 +203,10 @@ let
      "test_main_exit_calls_version_check"
      # AssertionError: assert 2 == 1
      "test_simple_send_non_retryable_error"
      # Upstream tests incompatible with current litellm version
      "test_max_context_tokens"
      "test_cmd_tokens_output"
      "test_cmd_read_only_with_image_file"
    ]
    ++ lib.optionals stdenv.hostPlatform.isDarwin [
      # Tests fails on darwin
+3 −0
Original line number Diff line number Diff line
@@ -206,6 +206,9 @@ python.pkgs.buildPythonApplication rec {
    "test_get_reranker"
    "test_query_tool_success"
    "test_supported_rerankers_initialization"
    # tree-sitter-language-pack 1.x.x raises LanguageNotFoundError for unknown
    # languages here, while this test still expects LookupError.
    "test_treesitter_chunker_parser_from_config_no_parser_found_error"
  ];

  passthru = {
+131 −57
Original line number Diff line number Diff line
{
  lib,
  buildPackages,
  buildPythonPackage,
  fetchPypi,
  fetchFromGitHub,
  fetchurl,
  python,
  pytestCheckHook,
  nix-update-script,

  # build-system
  cython,
  setuptools,
  typing-extensions,

  # dependencies
  rustPlatform,
  stdenv,
  tree-sitter,
  tree-sitter-c-sharp,
  tree-sitter-embedded-template,
  tree-sitter-yaml,
}:

buildPythonPackage rec {
let
  parserReleaseUrl =
    version: "https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases/download/v${version}";

  parserBundleSpecs = {
    aarch64-darwin = {
      suffix = "macos-arm64";
      hash = "sha256-pYrgwhb3BkOEqot5JBi26aXBciGt7/zP/1+HcQT2vsw=";
    };
    aarch64-linux = {
      suffix = "linux-aarch64";
      hash = "sha256-t1rWm19iExYAZXluMQqlt9bOkEC2UumcxDov8YmYEEQ=";
    };
    x86_64-linux = {
      suffix = "linux-x86_64";
      hash = "sha256-o4IpLZDitTsHfF2KMnyB3Wry7Hig7Byxd0JLcZPybJ0=";
    };
  };
in
buildPythonPackage (finalAttrs: {
  pname = "tree-sitter-language-pack";
  version = "0.13.0";
  pyproject = true;
  version = "1.4.1";

  # Using the GitHub sources necessitates fetching the treesitter grammar parsers by using a vendored script.
  # The pypi archive has the benefit of already vendoring those dependencies which makes packaging easier on our side
  # See: https://github.com/Goldziher/tree-sitter-language-pack/blob/main/scripts/clone_vendors.py
  src = fetchPypi {
    pname = "tree_sitter_language_pack";
    inherit version;
    hash = "sha256-AyA0xeJ7H24AcwuefC28ggO0cA0MaB/QGdbe/PYRg+w=";
  src = fetchFromGitHub {
    owner = "kreuzberg-dev";
    repo = "tree-sitter-language-pack";
    tag = "v${finalAttrs.version}";
    hash = "sha256-kN2htitEOo+JF6DCrC4RHmHkZXnUA0fUo2jSbMELQHI=";
  };

  # Upstream bumped dependencies aggressively, but we can still use older
  # versions since the newer ones aren’t packaged in nixpkgs. We can't use
  # pythonRelaxDepsHook here because it runs in postBuild, while the dependency
  # check occurs during the build phase.
  postPatch = ''
    substituteInPlace pyproject.toml \
      --replace-fail "typing-extensions>=4.15.0" "typing-extensions>=4.14.1"
  '';
  cargoDeps = rustPlatform.fetchCargoVendor {
    inherit (finalAttrs)
      pname
      version
      src
      ;
    hash = "sha256-ii3rvAfs4xMSyEEDjUrjL2SAONd0ARCVhwQNCJLwuCk=";
  };

  nativeCheckInputs = [
    pytestCheckHook
  ];
  buildAndTestSubdir = "crates/ts-pack-python";

  build-system = [
    cython
    setuptools
    typing-extensions
  ];
  # Pin the release metadata and per-platform parser archive so runtime use stays offline.
  parserManifest = fetchurl {
    url = "${parserReleaseUrl finalAttrs.version}/parsers.json";
    hash = "sha256-8utASonvrLzOjxZcmRuzuFSGtYe5sEoMU+xz++bfmkk=";
  };

  dependencies = [
    tree-sitter
    tree-sitter-c-sharp
    tree-sitter-embedded-template
    tree-sitter-yaml
  ];
  parserBundle =
    let
      spec =
        parserBundleSpecs.${stdenv.hostPlatform.system}
          or (throw "tree-sitter-language-pack parser bundle is unavailable for ${stdenv.hostPlatform.system}");
    in
    fetchurl {
      url = "${parserReleaseUrl finalAttrs.version}/parsers-${spec.suffix}.tar.zst";
      inherit (spec) hash;
    };

  pythonRelaxDeps = [
    "tree-sitter"
    "tree-sitter-embedded-template"
    "tree-sitter-yaml"
  nativeBuildInputs = [
    buildPackages.zstd
    rustPlatform.cargoSetupHook
    rustPlatform.maturinBuildHook
  ];

  pythonImportsCheck = [
    "tree_sitter_language_pack"
    "tree_sitter_language_pack.bindings"
  nativeCheckInputs = [ pytestCheckHook ];

  dependencies = [ tree-sitter ];

  disabledTests = [
    # tree-sitter-language-pack 1.4.1 upstream smoke tests expect these aliases
    # to resolve directly in the offline cache, but the packaged bundle still
    # exposes the underlying parser library names.
    "test_get_language_returns_non_none"
    "test_get_parser_for_previously_broken_languages"
    "test_has_language_for_previously_broken"
  ];

  # make sure import the built version, not the source one
  preCheck = ''
    rm -r tree_sitter_language_pack
    # Mirror the upstream cache layout: libs live in cache_dir, while the manifest
    # is expected at cache_dir/../manifest.json.
    cacheRoot=$PWD/.tree-sitter-language-pack-cache
    cacheDir="$cacheRoot/libs"
    mkdir -p "$cacheDir"
    cp ${finalAttrs.parserManifest} "$cacheRoot/manifest.json"
    ${lib.getExe buildPackages.zstd} -d -c ${finalAttrs.parserBundle} | tar -xvf - -C "$cacheDir" >/dev/null

    # Upstream smoke tests call download APIs even when the parsers are already
    # available locally, so point them at the pre-fetched cache and short-circuit
    # redundant network downloads during pytest.
    cat > conftest.py <<EOF
    import json
    from pathlib import Path

    import tree_sitter_language_pack as tslp

    _cache_dir = Path(r"$cacheDir")
    _manifest_path = _cache_dir.parent / "manifest.json"

    tslp.configure(cache_dir=str(_cache_dir))

    def _manifest_languages():
        return sorted(json.loads(_manifest_path.read_text())["languages"].keys())

    def _download(names):
        return 0

    def _download_all():
        return 0

    tslp.manifest_languages = _manifest_languages
    tslp.download = _download
    tslp.download_all = _download_all
    EOF
  '';

  passthru.updateScript = nix-update-script { };
  pytestFlagsArray = [
    "e2e/python/tests"
    "tests/test_apps/python/smoke_test.py"
  ];

  postInstall = ''
    cacheRoot=$out/share/tree-sitter-language-pack
    cacheDir="$cacheRoot/libs"
    mkdir -p "$cacheDir"
    cp ${finalAttrs.parserManifest} "$cacheRoot/manifest.json"
    ${lib.getExe buildPackages.zstd} -d -c ${finalAttrs.parserBundle} | tar -xvf - -C "$cacheDir" >/dev/null

    # Make the installed package default to the pre-fetched cache in $out.
    substituteInPlace $out/${python.sitePackages}/tree_sitter_language_pack/__init__.py \
      --replace-fail 'SupportedLanguage: TypeAlias = str' $'configure(cache_dir="'$cacheDir$'")\n\nSupportedLanguage: TypeAlias = str'
  '';

  pythonImportsCheck = [ "tree_sitter_language_pack" ];

  passthru.updateScript = ./update.sh;

  meta = {
    description = "Comprehensive collection of tree-sitter languages";
    homepage = "https://github.com/Goldziher/tree-sitter-language-pack";
    changelog = "https://github.com/Goldziher/tree-sitter-language-pack/releases/tag/v${version}";
    description = "Comprehensive collection of tree-sitter language parsers with polyglot bindings";
    homepage = "https://github.com/kreuzberg-dev/tree-sitter-language-pack";
    changelog = "https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases/tag/v${finalAttrs.version}";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ yzx9 ];
    platforms = builtins.attrNames parserBundleSpecs;
  };
}
})
+86 −0
Original line number Diff line number Diff line
#!/usr/bin/env nix-shell
#!nix-shell -I nixpkgs=./. -i bash -p bash curl jq gnused coreutils nix nix-update

set -euo pipefail

PACKAGE_DIR="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 && pwd -P)"
NIXPKGS_ROOT="$(cd -- "$PACKAGE_DIR/../../../.." >/dev/null 2>&1 && pwd -P)"
PACKAGE_FILE="$PACKAGE_DIR/default.nix"
ATTR_PATH="${UPDATE_NIX_ATTR_PATH:-python3Packages.tree-sitter-language-pack}"

latest_version() {
  curl -sL ${GITHUB_TOKEN:+ -H "Authorization: Bearer $GITHUB_TOKEN"} \
    "https://api.github.com/repos/kreuzberg-dev/tree-sitter-language-pack/releases/latest" \
    | jq -r '.tag_name | sub("^v"; "")'
}

replace_value() {
  local pattern="$1"
  local replacement="$2"
  sed -i "s|$pattern|$replacement|g" "$PACKAGE_FILE"
}

replace_perl() {
  local pattern="$1"
  local replacement="$2"
  perl -0pi -e "s|$pattern|$replacement|s" "$PACKAGE_FILE"
}

prefetch_sri() {
  local url="$1"
  local hash
  hash="$(nix-prefetch-url --type sha256 "$url")"
  nix hash convert --hash-algo sha256 --to sri "$hash"
}

version="${1:-$(latest_version)}"

if [[ "$version" == "${UPDATE_NIX_OLD_VERSION:-}" ]]; then
  echo "$ATTR_PATH is already at $version"
  exit 0
fi

nix-update "$ATTR_PATH" --version "$version"

release_url="https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases/download/v$version"

manifest_hash="$(prefetch_sri "$release_url/parsers.json")"
aarch64_darwin_hash="$(prefetch_sri "$release_url/parsers-macos-arm64.tar.zst")"
aarch64_linux_hash="$(prefetch_sri "$release_url/parsers-linux-aarch64.tar.zst")"
x86_64_linux_hash="$(prefetch_sri "$release_url/parsers-linux-x86_64.tar.zst")"

replace_perl '(parserManifest = fetchurl \{\n    url = "\$\{parserReleaseUrl finalAttrs\.version\}/parsers\.json";\n    hash = ")[^"]*(";)' "\${1}$manifest_hash\${2}"
replace_perl '(aarch64-darwin = \{\n      suffix = "macos-arm64";\n      hash = ")[^"]*(";)' "\${1}$aarch64_darwin_hash\${2}"
replace_perl '(aarch64-linux = \{\n      suffix = "linux-aarch64";\n      hash = ")[^"]*(";)' "\${1}$aarch64_linux_hash\${2}"
replace_perl '(x86_64-linux = \{\n      suffix = "linux-x86_64";\n      hash = ")[^"]*(";)' "\${1}$x86_64_linux_hash\${2}"

old_cargo_hash="$(
  perl -0ne 'print "$1\n" if /cargoDeps = rustPlatform\.fetchCargoVendor \{\n(?:.*\n)*?    hash = "([^"]+)";/s' "$PACKAGE_FILE"
)"
fake_hash='sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='
replace_value "$old_cargo_hash" "$fake_hash"

set +e
build_output="$(
  cd "$NIXPKGS_ROOT" &&
    nix build ".#$ATTR_PATH" 2>&1 >/dev/null
)"
build_status=$?
set -e

if [[ $build_status -eq 0 ]]; then
  echo "expected cargo hash mismatch build to fail, but it succeeded" >&2
  exit 1
fi

new_cargo_hash="$(printf '%s\n' "$build_output" | sed -n 's/.*got:[[:space:]]*\(sha256-[A-Za-z0-9+/=]*\).*/\1/p' | head -n1)"

if [[ -z "$new_cargo_hash" ]]; then
  printf '%s\n' "$build_output" >&2
  echo "failed to extract cargo hash from build output" >&2
  exit 1
fi

replace_value "$fake_hash" "$new_cargo_hash"

echo "updated $ATTR_PATH to $version"