Unverified Commit fba30902 authored by Vladimír Čunát's avatar Vladimír Čunát
Browse files

python312Packages.mlflow: 2.14.3 -> 2.16.2 (#347104)

parents 8528157f 8e03a09b
Loading
Loading
Loading
Loading
+85 −0
Original line number Diff line number Diff line
{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  setuptools,

  # dependencies
  google-auth,
  requests,

  # tests
  pyfakefs,
  pytestCheckHook,
  pytest-mock,
  requests-mock,
}:

buildPythonPackage rec {
  pname = "databricks-sdk";
  version = "0.34.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "databricks";
    repo = "databricks-sdk-py";
    rev = "refs/tags/v${version}";
    hash = "sha256-pbOm1aTHtIAwk/TJ5CCT9/CqSTuHTWkRgJuflObkU54=";
  };

  build-system = [
    setuptools
  ];

  dependencies = [
    google-auth
    requests
  ];

  pythonImportsCheck = [
    "databricks.sdk"
  ];

  nativeCheckInputs = [
    pyfakefs
    pytestCheckHook
    pytest-mock
    requests-mock
  ];

  disabledTests =
    [
      # Require internet access
      # ValueError: default auth: cannot configure default credentials, please chec...
      "test_azure_cli_does_not_specify_tenant_id_with_msi"
      "test_azure_cli_fallback"
      "test_azure_cli_user_no_management_access"
      "test_azure_cli_user_with_management_access"
      "test_azure_cli_with_warning_on_stderr"
      "test_azure_cli_workspace_header_present"
      "test_config_azure_cli_host"
      "test_config_azure_cli_host_and_resource_id"
      "test_config_azure_cli_host_and_resource_i_d_configuration_precedence"
      "test_load_azure_tenant_id_404"
      "test_load_azure_tenant_id_happy_path"
      "test_load_azure_tenant_id_no_location_header"
      "test_load_azure_tenant_id_unparsable_location_header"
    ]
    ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [
      # requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(54, 'Connection reset by peer')", ConnectionResetError(54, 'Connection reset by peer'))
      "test_github_oidc_flow_works_with_azure"
    ];

  __darwinAllowLocalNetworking = true;

  meta = {
    description = "Databricks SDK for Python";
    homepage = "https://github.com/databricks/databricks-sdk-py";
    changelog = "https://github.com/databricks/databricks-sdk-py/blob/${src.rev}/CHANGELOG.md";
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [ GaetanLepage ];
  };
}
+115 −43
Original line number Diff line number Diff line
{
  lib,
  fetchFromGitHub,

  # build-system
  setuptools,

  # dependencies
  alembic,
  buildPythonPackage,
  cachetools,
  click,
  cloudpickle,
  databricks-cli,
  databricks-sdk,
  docker,
  entrypoints,
  fetchPypi,
  flask,
  gitpython,
  gorilla,
  graphene,
  gunicorn,
  importlib-metadata,
  jinja2,
  markdown,
  matplotlib,
  numpy,
@@ -22,60 +26,77 @@
  opentelemetry-sdk,
  packaging,
  pandas,
  prometheus-flask-exporter,
  protobuf,
  python-dateutil,
  pythonOlder,
  pyarrow,
  pytz,
  pyyaml,
  querystring-parser,
  requests,
  setuptools,
  scikit-learn,
  scipy,
  simplejson,
  sqlalchemy,
  sqlparse,

  # tests
  aiohttp,
  azure-core,
  azure-storage-blob,
  azure-storage-file,
  boto3,
  botocore,
  catboost,
  datasets,
  fastapi,
  google-cloud-storage,
  httpx,
  jwt,
  keras,
  langchain,
  librosa,
  moto,
  opentelemetry-exporter-otlp,
  optuna,
  pydantic,
  pyspark,
  pytestCheckHook,
  pytorch-lightning,
  sentence-transformers,
  starlette,
  statsmodels,
  tensorflow,
  torch,
  transformers,
  uvicorn,
  xgboost,
}:

buildPythonPackage rec {
  pname = "mlflow";
  version = "2.14.3";
  version = "2.16.2";
  pyproject = true;

  disabled = pythonOlder "3.8";

  src = fetchPypi {
    inherit pname version;
    hash = "sha256-KSyuS4NXSgyyIxF+IkyqZ5iTMHivAjNxnCthK+pkVhc=";
  src = fetchFromGitHub {
    owner = "mlflow";
    repo = "mlflow";
    rev = "refs/tags/v${version}";
    hash = "sha256-7W1gpVgJSN/iXoW987eCHfcOeE3D/ZJ2W/eilDdzOww=";
  };

  # Remove currently broken dependency `shap`, a model explainability package.
  # This seems quite unprincipled especially with tests not being enabled,
  # but not mlflow has a 'skinny' install option which does not require `shap`.
  pythonRemoveDeps = [ "shap" ];
  pythonRelaxDeps = [
    "gunicorn"
    "packaging"
    "pytz"
    "pyarrow"
  build-system = [
    setuptools
  ];

  propagatedBuildInputs = [
  dependencies = [
    alembic
    cachetools
    click
    cloudpickle
    databricks-cli
    databricks-sdk
    docker
    entrypoints
    flask
    gitpython
    gorilla
    graphene
    gunicorn
    importlib-metadata
    jinja2
    markdown
    matplotlib
    numpy
@@ -83,36 +104,87 @@ buildPythonPackage rec {
    opentelemetry-sdk
    packaging
    pandas
    prometheus-flask-exporter
    protobuf
    pyarrow
    python-dateutil
    pytz
    pyyaml
    querystring-parser
    requests
    scikit-learn
    scipy
    setuptools
    #shap
    simplejson
    sqlalchemy
    sqlparse
  ];

  pythonImportsCheck = [ "mlflow" ];

  # no tests in PyPI dist
  # run into https://stackoverflow.com/questions/51203641/attributeerror-module-alembic-context-has-no-attribute-config
  # also, tests use conda so can't run on NixOS without buildFHSEnv
  nativeCheckInputs = [
    aiohttp
    azure-core
    azure-storage-blob
    azure-storage-file
    boto3
    botocore
    catboost
    datasets
    fastapi
    google-cloud-storage
    httpx
    jwt
    keras
    langchain
    librosa
    moto
    opentelemetry-exporter-otlp
    optuna
    pydantic
    pyspark
    pytestCheckHook
    pytorch-lightning
    sentence-transformers
    starlette
    statsmodels
    tensorflow
    torch
    transformers
    uvicorn
    xgboost
  ];

  disabledTestPaths = [
    # Requires unpackaged `autogen`
    "tests/autogen/test_autogen_autolog.py"

    # Requires unpackaged `diviner`
    "tests/diviner/test_diviner_model_export.py"

    # Requires unpackaged `sktime`
    "examples/sktime/test_sktime_model_export.py"

    # Requires `fastai` which would cause a circular dependency
    "tests/fastai/test_fastai_autolog.py"
    "tests/fastai/test_fastai_model_export.py"

    # Requires `spacy` which would cause a circular dependency
    "tests/spacy/test_spacy_model_export.py"

    # Requires `tensorflow.keras` which is not included in our outdated version of `tensorflow` (2.13.0)
    "tests/gateway/providers/test_ai21labs.py"
    "tests/tensorflow/test_keras_model_export.py"
    "tests/tensorflow/test_keras_pyfunc_model_works_with_all_input_types.py"
    "tests/tensorflow/test_mlflow_callback.py"
  ];

  # I (@GaetanLepage) gave up at enabling tests:
  # - They require a lot of dependencies (some unpackaged);
  # - Many errors occur at collection time;
  # - Most (all ?) tests require internet access anyway.
  doCheck = false;

  meta = with lib; {
  meta = {
    description = "Open source platform for the machine learning lifecycle";
    mainProgram = "mlflow";
    homepage = "https://github.com/mlflow/mlflow";
    changelog = "https://github.com/mlflow/mlflow/blob/v${version}/CHANGELOG.md";
    license = licenses.asl20;
    maintainers = with maintainers; [ tbenst ];
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [ tbenst ];
  };
}
+23 −20
Original line number Diff line number Diff line
{ python3, writeText}:
{ python3Packages, writers }:

let
  py = python3.pkgs;
in
py.toPythonApplication
  (py.mlflow.overridePythonAttrs(old: rec {

    propagatedBuildInputs = old.propagatedBuildInputs ++ [
      py.boto3
      py.mysqlclient
    ];

    postPatch = (old.postPatch or "") + ''
      substituteInPlace mlflow/utils/process.py --replace \
        "child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True," \
        "cmd[0]='$out/bin/gunicornMlflow'; child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True,"
    '';
  py = python3Packages;

    gunicornScript = writeText "gunicornMlflow"
    ''
        #!/usr/bin/env python
  gunicornScript = writers.writePython3 "gunicornMlflow" { } ''
    import re
    import sys
    from gunicorn.app.wsgiapp import run
@@ -27,10 +11,29 @@ py.toPythonApplication
        sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0])
        sys.exit(run())
  '';
in
py.toPythonApplication (
  py.mlflow.overridePythonAttrs (old: {

    propagatedBuildInputs = old.dependencies ++ [
      py.boto3
      py.mysqlclient
    ];

    postPatch =
      (old.postPatch or "")
      + ''
        cat mlflow/utils/process.py

        substituteInPlace mlflow/utils/process.py --replace-fail \
          "process = subprocess.Popen(" \
          "cmd[0]='${gunicornScript}'; process = subprocess.Popen("
      '';

    postInstall = ''
      gpath=$out/bin/gunicornMlflow
      cp ${gunicornScript} $gpath
      chmod 555 $gpath
    '';
}))
  })
)
+2 −0
Original line number Diff line number Diff line
@@ -2886,6 +2886,8 @@ self: super: with self; {
  databricks-sql-connector = callPackage ../development/python-modules/databricks-sql-connector { };
  databricks-sdk = callPackage ../development/python-modules/databricks-sdk { };
  dataclass-factory = callPackage ../development/python-modules/dataclass-factory { };
  dataclass-wizard = callPackage ../development/python-modules/dataclass-wizard { };