Unverified Commit 1fa09357 authored by Matthieu Coudron's avatar Matthieu Coudron Committed by GitHub
Browse files

helix: build tree-sitter grammars from source (#452275)

parents d903daa5 95dd21fe
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -25,15 +25,11 @@ rustPlatform.buildRustPackage (finalAttrs: {
  env = {
    # disable fetching and building of tree-sitter grammars in the helix-term build.rs
    HELIX_DISABLE_AUTO_GRAMMAR_BUILD = "1";
    HELIX_DEFAULT_RUNTIME = "${placeholder "out"}/lib/runtime";
    HELIX_DEFAULT_RUNTIME = helix.runtime;
  };

  postInstall = ''
    mkdir -p $out/lib
    cp -r runtime $out/lib
    # copy tree-sitter grammars from helix package
    # TODO: build it from source instead
    cp -r ${helix}/lib/runtime/grammars/* $out/lib/runtime/grammars/
    installShellCompletion contrib/completion/hx.{bash,fish,zsh}
    mkdir -p $out/share/{applications,icons/hicolor/256x256/apps}
    cp contrib/Helix.desktop $out/share/applications
+155 −0
Original line number Diff line number Diff line
#!/usr/bin/env nix-shell
#! nix-shell -i python3 -p python3 nurl
"""
Generate grammar information for Helix editor by parsing languages.toml
and fetching source information using nurl in parallel.
"""

import argparse
import asyncio
import json
import os
import sys
import tomllib
from dataclasses import dataclass
from pathlib import Path
from typing import Any


@dataclass
class Grammar:
    name: str
    git_url: str
    rev: str
    subpath: str | None = None


async def run_nurl(url: str, rev: str, semaphore: asyncio.Semaphore) -> dict[str, Any]:
    """Run nurl command for a single grammar and return parsed JSON output."""
    async with semaphore:
        proc = await asyncio.create_subprocess_exec(
            "nurl",
            url,
            rev,
            "--json",
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await proc.communicate()

        if proc.returncode != 0:
            raise RuntimeError(f"nurl failed for {url}@{rev}: {stderr.decode()}")

        return json.loads(stdout.decode())


def parse_languages_toml(toml_path: Path) -> list[Grammar]:
    """Parse languages.toml and extract grammar information."""
    with open(toml_path, "rb") as f:
        config = tomllib.load(f)

    grammars = []
    for grammar in config.get("grammar", []):
        if "source" not in grammar:
            continue

        source = grammar["source"]
        if "git" not in source or "rev" not in source:
            continue

        grammars.append(
            Grammar(
                name=grammar["name"].replace("_", "-"),
                git_url=source["git"],
                rev=source["rev"],
                subpath=source.get("subpath"),
            )
        )

    return grammars


async def fetch_all_grammars(
    grammars: list[Grammar], max_parallel: int
) -> dict[str, Any]:
    """Fetch nurl information for all grammars in parallel."""
    semaphore = asyncio.Semaphore(max_parallel)
    results = {}
    total = len(grammars)
    completed = 0

    tasks = []
    for grammar in grammars:
        task = run_nurl(grammar.git_url, grammar.rev, semaphore)
        tasks.append((grammar, task))

    for grammar, task in tasks:
        try:
            result = await task
            results[grammar.name] = {
                "nurl": result,
                "subpath": grammar.subpath,
            }
            completed += 1
            print(f"[{completed}/{total}] ✓ {grammar.name}", file=sys.stderr)
        except Exception as e:
            completed += 1
            print(f"[{completed}/{total}] ✗ {grammar.name}: {e}", file=sys.stderr)
            results[grammar.name] = {"error": str(e)}

    return results


async def main():
    parser = argparse.ArgumentParser(
        description="Generate grammar information for Helix editor"
    )
    parser.add_argument(
        "languages_toml",
        type=Path,
        help="path to languages.toml from Helix repository",
    )
    parser.add_argument(
        "-o",
        "--output",
        type=Path,
        default=Path("grammars.json"),
        help="output JSON file (default: grammars.json)",
    )
    parser.add_argument(
        "-j",
        "--jobs",
        type=int,
        default=os.cpu_count(),
        help=f"number of parallel nurl instances (default: {os.cpu_count()})",
    )

    args = parser.parse_args()

    if not args.languages_toml.exists():
        print(f"Error: {args.languages_toml} not found", file=sys.stderr)
        sys.exit(1)

    print(f"Parsing {args.languages_toml}...", file=sys.stderr)
    grammars = parse_languages_toml(args.languages_toml)
    print(f"Found {len(grammars)} grammars", file=sys.stderr)

    print(f"Fetching grammar information ({args.jobs} parallel jobs)...", file=sys.stderr)
    results = await fetch_all_grammars(grammars, args.jobs)

    errors = [name for name, data in results.items() if "error" in data]
    if errors:
        print(f"\nFailed grammars ({len(errors)}):", file=sys.stderr)
        for name in errors:
            print(f"  - {name}: {results[name]['error']}", file=sys.stderr)
        sys.exit(1)

    with open(args.output, "w") as f:
        json.dump(results, f, indent=2)
        f.write('\n')

    print(f"\nResults written to {args.output}", file=sys.stderr)


if __name__ == "__main__":
    asyncio.run(main())
+2967 −0

File added.

Preview size limit exceeded, changes collapsed.

+145 −79
Original line number Diff line number Diff line
{
  fetchzip,
  fetchpatch,
  fetchFromGitHub,
  lib,
  rustPlatform,
  mdbook,
  git,
  gitMinimal,
  installShellFiles,
  versionCheckHook,
  nix-update-script,
  runCommand,
  removeReferencesTo,
  pkgs,
  tree-sitter,
  lockedGrammars ? lib.importJSON ./grammars.json,
  grammarsOverlay ? (
    final: prev: {
      tree-sitter-sql = prev.tree-sitter-sql.override {
        generate = false;
      };
      tree-sitter-qmljs = prev.tree-sitter-qmljs.overrideAttrs {
        dontCheckForBrokenSymlinks = true;
      };
    }
  ),
}:

rustPlatform.buildRustPackage (finalAttrs: {
rustPlatform.buildRustPackage (
  finalAttrs:
  let
    lockedVersionsOverlay =
      final: prev:
      lib.mapAttrs (
        drvName: grammar:
        let
          lockedGrammar = lockedGrammars.${lib.removePrefix "tree-sitter-" drvName};
        in
        (prev.${drvName}.override {
          location = lockedGrammar.subpath;
        }).overrideAttrs
          {
            version = lib.sources.shortRev lockedGrammar.nurl.args.rev;
            src = (pkgs.${lockedGrammar.nurl.fetcher} lockedGrammar.nurl.args);
          }
      ) prev;

    tree-sitter-grammars =
      lib.filterAttrs (drvName: _: lib.hasAttr (lib.removePrefix "tree-sitter-" drvName) lockedGrammars)
        (
          tree-sitter.grammarsScope.overrideScope (
            lib.composeExtensions lockedVersionsOverlay grammarsOverlay
          )
        );

    # Dynamic libraries for the grammars always use the `.so` extension, also on Darwin (should use `.dylib`)
    # See here: https://github.com/helix-editor/helix/pull/14982
    # Switch to `stdenv.hostPlatform.extensions.sharedLibrary` once the fix above reaches the next release

    grammarsFarm = runCommand "helix-grammars" { } (
      lib.concatMapAttrsStringSep "\n" (_: grammar: ''
        install -D ${grammar}/parser $out/${grammar.language}.so
        ${lib.getExe removeReferencesTo} -t ${grammar} $out/${grammar.language}.so
      '') (lib.filterAttrs (_: lib.isDerivation) tree-sitter-grammars)
    );

    lockedGrammarsCount = lib.length (lib.attrNames lockedGrammars);

    runtimeDir = runCommand "helix-runtime" { } ''
      cp -r --no-preserve=mode ${finalAttrs.src}/runtime $out
      rm -r $out/grammars
      ln -s ${grammarsFarm} $out/grammars
      count=$(ls -1 "$out/grammars/" | wc -l)
      if [ "$count" -ne ${toString lockedGrammarsCount} ]; then
        echo "Expected ${toString lockedGrammarsCount} grammars, found $count"
        exit 1
      fi
    '';
  in
  {
    pname = "helix";
    version = "25.07.1";
    outputs = [
@@ -18,12 +82,11 @@ rustPlatform.buildRustPackage (finalAttrs: {
      "doc"
    ];

  # This release tarball includes source code for the tree-sitter grammars,
  # which is not ordinarily part of the repository.
  src = fetchzip {
    url = "https://github.com/helix-editor/helix/releases/download/${finalAttrs.version}/helix-${finalAttrs.version}-source.tar.xz";
    hash = "sha256-Pj/lfcQXRWqBOTTWt6+Gk61F9F1UmeCYr+26hGdG974=";
    stripRoot = false;
    src = fetchFromGitHub {
      owner = "helix-editor";
      repo = "helix";
      tag = "${finalAttrs.version}";
      hash = "sha256-RFSzGAcB0mMg/02ykYfTWXzQjLFu2CJ4BkS5HZ/6pBo=";
    };

    patches = [
@@ -40,27 +103,26 @@ rustPlatform.buildRustPackage (finalAttrs: {
    cargoHash = "sha256-Mf0nrgMk1MlZkSyUN6mlM5lmTcrOHn3xBNzmVGtApEU=";

    nativeBuildInputs = [
    git
      gitMinimal
      installShellFiles
      mdbook
    ];

  env.HELIX_DEFAULT_RUNTIME = "${placeholder "out"}/lib/runtime";
    env = {
      HELIX_DEFAULT_RUNTIME = runtimeDir;
      HELIX_DISABLE_AUTO_GRAMMAR_BUILD = "1";
    };

    postBuild = ''
      mdbook build book -d ../book-html
    '';

    postInstall = ''
    # not needed at runtime
    rm -r runtime/grammars/sources

      mkdir -p $out/lib $doc/share/doc
    cp -r runtime $out/lib
      installShellCompletion contrib/completion/hx.{bash,fish,zsh}
      mkdir -p $out/share/{applications,icons/hicolor/256x256/apps}
    cp contrib/Helix.desktop $out/share/applications
    cp contrib/helix.png $out/share/icons/hicolor/256x256/apps
      cp contrib/Helix.desktop $out/share/applications/Helix.desktop
      cp contrib/helix.png $out/share/icons/hicolor/256x256/apps/helix.png
      cp -r ../book-html $doc/share/doc/$name
    '';

@@ -71,7 +133,9 @@ rustPlatform.buildRustPackage (finalAttrs: {
    doInstallCheck = true;

    passthru = {
    updateScript = nix-update-script { };
      updateScript = ./update.sh;
      runtime = runtimeDir;
      inherit tree-sitter-grammars;
    };

    meta = {
@@ -81,8 +145,10 @@ rustPlatform.buildRustPackage (finalAttrs: {
      license = lib.licenses.mpl20;
      mainProgram = "hx";
      maintainers = with lib.maintainers; [
        aciceri
        danth
        yusdacra
      ];
    };
})
  }
)
+24 −0
Original line number Diff line number Diff line
#!/usr/bin/env nix-shell
#!nix-shell -i bash -p nurl nix-update python3

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

echo "Updating helix source to the latest stable..."
nix-update helix

echo "Fetching updated helixSource..."
HELIX_SRC=$(nix-instantiate --eval -A "helix.src.outPath" --raw)

echo "Generating grammars.json..."
"$SCRIPT_DIR/generate_grammars.py" \
  "$HELIX_SRC/languages.toml" \
  -o "$SCRIPT_DIR/grammars.json"

if [ $? -ne 0 ]; then
  echo "Error: Failed to generate grammars.json" >&2
  exit 1
fi

echo "Done! Updated grammars.json"
Loading