Unverified Commit 80b9b2bc authored by Gaétan Lepage's avatar Gaétan Lepage Committed by GitHub
Browse files

python3Packages.chromadb: fixes (#412528)

parents b067dbf0 c112d761
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ python3Packages.buildPythonApplication rec {
  dependencies =
    with python3Packages;
    [
      chromadb
      chromadb_0
      colorlog
      httpx
      json5
+220 −0
Original line number Diff line number Diff line
{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  setuptools-scm,
  setuptools,

  # build inputs
  cargo,
  pkg-config,
  protobuf,
  rustc,
  rustPlatform,
  openssl,

  # dependencies
  bcrypt,
  build,
  fastapi,
  grpcio,
  httpx,
  importlib-resources,
  kubernetes,
  mmh3,
  numpy,
  onnxruntime,
  opentelemetry-api,
  opentelemetry-exporter-otlp-proto-grpc,
  opentelemetry-instrumentation-fastapi,
  opentelemetry-sdk,
  orjson,
  overrides,
  posthog,
  pulsar-client,
  pydantic,
  pypika,
  pyyaml,
  requests,
  tenacity,
  tokenizers,
  tqdm,
  typer,
  typing-extensions,
  uvicorn,
  zstd,

  # optional dependencies
  chroma-hnswlib,

  # tests
  hypothesis,
  psutil,
  pytest-asyncio,
  pytestCheckHook,

  # passthru
  nixosTests,
  nix-update-script,
}:

buildPythonPackage rec {
  pname = "chromadb_0";
  version = "0.6.3";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "chroma-core";
    repo = "chroma";
    tag = version;
    hash = "sha256-yvAX8buETsdPvMQmRK5+WFz4fVaGIdNlfhSadtHwU5U=";
  };

  cargoDeps = rustPlatform.fetchCargoVendor {
    inherit src;
    name = "${pname}-${version}";
    hash = "sha256-lHRBXJa/OFNf4x7afEJw9XcuDveTBIy3XpQ3+19JXn4=";
  };

  pythonRelaxDeps = [
    "chroma-hnswlib"
    "orjson"
    "tokenizers"
  ];

  build-system = [
    setuptools
    setuptools-scm
  ];

  nativeBuildInputs = [
    cargo
    pkg-config
    protobuf
    rustc
    rustPlatform.cargoSetupHook
  ];

  buildInputs = [
    openssl
    zstd
  ];

  dependencies = [
    bcrypt
    build
    chroma-hnswlib
    fastapi
    grpcio
    httpx
    importlib-resources
    kubernetes
    mmh3
    numpy
    onnxruntime
    opentelemetry-api
    opentelemetry-exporter-otlp-proto-grpc
    opentelemetry-instrumentation-fastapi
    opentelemetry-sdk
    orjson
    overrides
    posthog
    pulsar-client
    pydantic
    pypika
    pyyaml
    requests
    tenacity
    tokenizers
    tqdm
    typer
    typing-extensions
    uvicorn
  ];

  nativeCheckInputs = [
    hypothesis
    psutil
    pytest-asyncio
    pytestCheckHook
  ];

  # Disable on aarch64-linux due to broken onnxruntime
  # https://github.com/microsoft/onnxruntime/issues/10038
  pythonImportsCheck = lib.optionals (stdenv.hostPlatform.system != "aarch64-linux") [ "chromadb" ];

  # Test collection breaks on aarch64-linux
  doCheck = stdenv.hostPlatform.system != "aarch64-linux";

  env = {
    ZSTD_SYS_USE_PKG_CONFIG = true;
  };

  pytestFlagsArray = [
    "-x" # these are slow tests, so stop on the first failure
    "-v"
  ];

  preCheck = ''
    (($(ulimit -n) < 1024)) && ulimit -n 1024
    export HOME=$(mktemp -d)
  '';

  disabledTests = [
    # Tests are flaky / timing sensitive
    "test_fastapi_server_token_authn_allows_when_it_should_allow"
    "test_fastapi_server_token_authn_rejects_when_it_should_reject"

    # Issue with event loop
    "test_http_client_bw_compatibility"

    # httpx ReadError
    "test_not_existing_collection_delete"
  ];

  disabledTestPaths = [
    # Tests require network access
    "chromadb/test/auth/test_simple_rbac_authz.py"
    "chromadb/test/db/test_system.py"
    "chromadb/test/ef/test_default_ef.py"
    "chromadb/test/property/"
    "chromadb/test/property/test_cross_version_persist.py"
    "chromadb/test/stress/"
    "chromadb/test/test_api.py"

    # httpx failures
    "chromadb/test/api/test_delete_database.py"

    # Cannot be loaded by pytest without path hacks (fixed in 1.0.0)
    "chromadb/test/test_logservice.py"
    "chromadb/test/proto/test_utils.py"
    "chromadb/test/segment/distributed/test_protobuf_translation.py"

    # Hypothesis FailedHealthCheck due to nested @given tests
    "chromadb/test/cache/test_cache.py"
  ];

  __darwinAllowLocalNetworking = true;

  passthru.tests = {
    inherit (nixosTests) chromadb;
  };

  # nixpkgs-update: no auto update
  # 0.6.3 is the last of the 0.x series

  meta = {
    description = "AI-native open-source embedding database";
    homepage = "https://github.com/chroma-core/chroma";
    changelog = "https://github.com/chroma-core/chroma/releases/tag/${version}";
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [
      fab
      sarahec
    ];
    mainProgram = "chroma";
  };
}
+135 −42
Original line number Diff line number Diff line
{
  lib,
  stdenv,
  bcrypt,
  build,
  buildPythonPackage,
  fetchFromGitHub,
  fetchurl,

  # build inputs
  cargo,
  chroma-hnswlib,
  pkg-config,
  protobuf,
  rustc,
  rustPlatform,
  pkgs, # zstd hidden by python3Packages.zstd
  openssl,

  # dependencies
  bcrypt,
  build,
  fastapi,
  fetchFromGitHub,
  grpcio,
  httpx,
  hypothesis,
  importlib-resources,
  jsonschema,
  kubernetes,
  mmh3,
  nixosTests,
  numpy,
  onnxruntime,
  openssl,
  opentelemetry-api,
  opentelemetry-exporter-otlp-proto-grpc,
  opentelemetry-instrumentation-fastapi,
  opentelemetry-sdk,
  orjson,
  overrides,
  pkg-config,
  posthog,
  protobuf,
  psutil,
  pulsar-client,
  pydantic,
  pypika,
  pytest-asyncio,
  pytestCheckHook,
  pythonOlder,
  pyyaml,
  requests,
  rustc,
  rustPlatform,
  setuptools-scm,
  setuptools,
  tenacity,
  tokenizers,
  tqdm,
  typer,
  typing-extensions,
  uvicorn,
  zstd,

  # optional dependencies
  chroma-hnswlib,

  # tests
  hnswlib,
  hypothesis,
  pandas,
  psutil,
  pytest-asyncio,
  pytest-xdist,
  pytestCheckHook,
  sqlite,
  starlette,

  # passthru
  nixosTests,
  nix-update-script,
}:

buildPythonPackage rec {
  pname = "chromadb";
  version = "0.5.20";
  version = "1.0.12";
  pyproject = true;

  disabled = pythonOlder "3.9";

  src = fetchFromGitHub {
    owner = "chroma-core";
    repo = "chroma";
    tag = version;
    hash = "sha256-DQHkgCHtrn9xi7Kp7TZ5NP1EtFtTH5QOvne9PUvxsWc=";
    hash = "sha256-Q4PhJTRNzJeVx6DIPWirnI9KksNb8vfOtqb/q9tSK3c=";
  };

  cargoDeps = rustPlatform.fetchCargoVendor {
    inherit pname version src;
    hash = "sha256-ZtCTg8qNCiqlH7RsZxaWUNAoazdgmXP2GtpjDpRdvbk=";
    inherit src;
    name = "${pname}-${version}-vendor";
    hash = "sha256-+Ea2aRrsBGfVCLdOF41jeMehJhMurc8d0UKrpR6ndag=";
  };

  # Can't use fetchFromGitHub as the build expects a zipfile
  swagger-ui = fetchurl {
    url = "https://github.com/swagger-api/swagger-ui/archive/refs/tags/v5.22.0.zip";
    hash = "sha256-H+kXxA/6rKzYA19v7Zlx2HbIg/DGicD5FDIs0noVGSk=";
  };

  patches = [
    # The fastapi servers can't set up their networking in the test environment, so disable for testing
    ./disable-fastapi-fixtures.patch
  ];

  postPatch = ''
    # Nixpkgs is taking the version from `chromadb_rust_bindings` which is versioned independently
    substituteInPlace pyproject.toml \
      --replace-fail "dynamic = [\"version\"]" "version = \"${version}\""
  '';

  pythonRelaxDeps = [
    "chroma-hnswlib"
    "orjson"
    "fastapi"
  ];

  build-system = [
    setuptools
    setuptools-scm
    rustPlatform.maturinBuildHook
  ];

  nativeBuildInputs = [
@@ -88,17 +117,17 @@ buildPythonPackage rec {

  buildInputs = [
    openssl
    zstd
    pkgs.zstd
  ];

  dependencies = [
    bcrypt
    build
    chroma-hnswlib
    fastapi
    grpcio
    httpx
    importlib-resources
    jsonschema
    kubernetes
    mmh3
    numpy
@@ -123,20 +152,43 @@ buildPythonPackage rec {
    uvicorn
  ];

  optional-dependencies = {
    dev = [ chroma-hnswlib ];
  };

  nativeCheckInputs = [
    chroma-hnswlib
    hnswlib
    hypothesis
    pandas
    psutil
    pytest-asyncio
    pytest-xdist
    pytestCheckHook
    sqlite
    starlette
  ];

  pythonImportsCheck = [ "chromadb" ];
  # Disable on aarch64-linux due to broken onnxruntime
  # https://github.com/microsoft/onnxruntime/issues/10038
  pythonImportsCheck = lib.optionals (stdenv.hostPlatform.system != "aarch64-linux") [ "chromadb" ];

  # Test collection breaks on aarch64-linux
  doCheck = stdenv.hostPlatform.system != "aarch64-linux";

  env = {
    ZSTD_SYS_USE_PKG_CONFIG = true;
    SWAGGER_UI_DOWNLOAD_URL = "file://${swagger-ui}";
  };

  pytestFlagsArray = [ "-x" ];
  pytestFlagsArray = [
    "-x" # these are slow tests, so stop on the first failure
    "-v"
    "-W"
    "ignore:DeprecationWarning"
    "-W"
    "ignore:PytestCollectionWarning"
  ];

  preCheck = ''
    (($(ulimit -n) < 1024)) && ulimit -n 1024
@@ -144,39 +196,80 @@ buildPythonPackage rec {
  '';

  disabledTests = [
    # Tests are laky / timing sensitive
    # Tests are flaky / timing sensitive
    "test_fastapi_server_token_authn_allows_when_it_should_allow"
    "test_fastapi_server_token_authn_rejects_when_it_should_reject"

    # Issue with event loop
    "test_http_client_bw_compatibility"
    # Issue with httpx

    # httpx ReadError
    "test_not_existing_collection_delete"

    # Tests launch a server and try to connect to it
    # These either have https connection errors or name resolution errors
    "test_collection_query_with_invalid_collection_throws"
    "test_collection_update_with_invalid_collection_throws"
    "test_default_embedding"
    "test_invalid_index_params"
    "test_peek"
    "test_persist_index_loading"
    "test_query_id_filtering_e2e"
    "test_query_id_filtering_medium_dataset"
    "test_query_id_filtering_small_dataset"
    "test_ssl_self_signed_without_ssl_verify"
    "test_ssl_self_signed"

    # Apparent race condition with sqlite
    # See https://github.com/chroma-core/chroma/issues/4661
    "test_multithreaded_get_or_create"
  ];

  disabledTestPaths = [
    # Tests require network access
    "chromadb/test/auth/test_simple_rbac_authz.py"
    "chromadb/test/db/test_system.py"
    "bin/rust_python_compat_test.py"
    "chromadb/test/configurations/test_collection_configuration.py"
    "chromadb/test/ef/test_default_ef.py"
    "chromadb/test/ef/test_onnx_mini_lm_l6_v2.py"
    "chromadb/test/ef/test_voyageai_ef.py"
    "chromadb/test/property/"
    "chromadb/test/property/test_cross_version_persist.py"
    "chromadb/test/stress/"
    "chromadb/test/test_api.py"

    # Tests time out (waiting for server)
    "chromadb/test/test_cli.py"

    # Cannot find protobuf file while loading test
    "chromadb/test/distributed/test_log_failover.py"
  ];

  __darwinAllowLocalNetworking = true;

  passthru.tests = {
  passthru = {
    tests = {
      inherit (nixosTests) chromadb;
    };

  meta = with lib; {
    updateScript = nix-update-script {
      # we have to update both the python hash and the cargo one,
      # so use nix-update-script
      extraArgs = [
        "--versionRegex"
        "([0-9].+)"
      ];
    };
  };

  meta = {
    description = "AI-native open-source embedding database";
    homepage = "https://github.com/chroma-core/chroma";
    changelog = "https://github.com/chroma-core/chroma/releases/tag/${version}";
    license = licenses.asl20;
    maintainers = with maintainers; [ fab ];
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [
      fab
      sarahec
    ];
    mainProgram = "chroma";
    broken = stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64;
  };
}
+14 −0
Original line number Diff line number Diff line
diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py
index efde1c382..163f55c57 100644
--- a/chromadb/test/conftest.py
+++ b/chromadb/test/conftest.py
@@ -678,9 +678,6 @@ def sqlite_persistent(request: pytest.FixtureRequest) -> Generator[System, None,
 
 def system_fixtures() -> List[Callable[[], Generator[System, None, None]]]:
     fixtures = [
-        fastapi,
-        async_fastapi,
-        fastapi_persistent,
         sqlite_fixture,
         sqlite_persistent_fixture,
     ]
+2 −2
Original line number Diff line number Diff line
@@ -14,14 +14,14 @@

buildPythonPackage rec {
  pname = "langchain-chroma";
  version = "0.2.3";
  version = "0.2.4";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "langchain-ai";
    repo = "langchain";
    tag = "langchain-chroma==${version}";
    hash = "sha256-6WOViBKXZ844g2M6pYohHsXnzJiWbTNgj9EjN+z+B+4=";
    hash = "sha256-w4xvPPLYkPiQA34bimVHLe+vghMI9Pq36CHoE/EMnr8=";
  };

  sourceRoot = "${src.name}/libs/partners/chroma";
Loading