Unverified Commit 36f60e18 authored by Wolfgang Walther's avatar Wolfgang Walther Committed by GitHub
Browse files

apache-airflow: 2.7.3 -> 2.10.5 (#396544)

parents d15e5233 87d364ec
Loading
Loading
Loading
Loading
+189 −13
Original line number Diff line number Diff line
{
  fetchFromGitHub,
  fetchpatch,
  fetchPypi,
  python3,
}:
@@ -8,12 +9,12 @@ let
  python = python3.override {
    self = python;
    packageOverrides = pySelf: pySuper: {
      connexion = pySuper.connexion.overridePythonAttrs (o: rec {
      connexion = pySuper.connexion.overridePythonAttrs rec {
        version = "2.14.2";
        src = fetchFromGitHub {
          owner = "spec-first";
          repo = "connexion";
          rev = "refs/tags/${version}";
          tag = version;
          hash = "sha256-1v1xCHY3ZnZG/Vu9wN/it7rLKC/StoDefoMNs+hMjIs=";
        };
        nativeBuildInputs = with pySelf; [
@@ -40,15 +41,44 @@ let
          aiohttp-remotes
          decorator
          pytest-aiohttp
          pytest-tornasync
          pytestCheckHook
          testfixtures
        ];
        disabledTests = [
          "test_aiohttp_simple_api"
          "test_app"
          "test_invalid_type" # https://github.com/spec-first/connexion/issues/1969
          "test_openapi_yaml_behind_proxy"
          "test_run_with_wsgi_containers"
          "test_swagger_ui"
        ];
      });
        postPatch = ''
          substituteInPlace connexion/__init__.py \
            --replace "2020.0.dev1" "${version}"
        '';
      };
      werkzeug = pySuper.werkzeug.overridePythonAttrs rec {
        version = "2.3.8";
        src = fetchPypi {
          pname = "werkzeug";
          inherit version;
          hash = "sha256-VUslfHS763oNJUFgpPj/4YUkP1KlIDUGC3Ycpi2XfwM=";
        };
        nativeCheckInputs = with pySelf; [
          pytest-xprocess
        ];
      };
      # flask's test-suite needs click 8.1.8
      #   TypeError: CliRunner.__init__() got an unexpected keyword argument 'mix_stderr'
      click = pySuper.click.overridePythonAttrs rec {
        version = "8.1.8";
        src = fetchPypi {
          pname = "click";
          inherit version;
          hash = "sha256-7VPJ2JkNg8Kifermjk7jN0c/YzDAQKMdQiXJV00WCWo=";
        };
      };
      flask = pySuper.flask.overridePythonAttrs (o: rec {
        version = "2.2.5";
        src = fetchPypi {
@@ -59,22 +89,67 @@ let
        nativeBuildInputs = (o.nativeBuildInputs or [ ]) ++ [
          pySelf.setuptools
        ];
        pytestFlagsArray = [
          # tests that are marked with filterwarnings fail with
          # DeprecationWarning: 'pkgutil.get_loader' is deprecated and slated for
          # removal in Python 3.14; use importlib.util.find_spec() instead
          "-W ignore::DeprecationWarning"
        ];
      });
      flask-login = pySuper.flask-login.overridePythonAttrs rec {
        version = "0.6.3";
        src = fetchFromGitHub {
          owner = "maxcountryman";
          repo = "flask-login";
          tag = version;
          hash = "sha256-Sn7Ond67P/3+OmKKFE/KfA6FE4IajhiRXVVrXKJtY3I=";
        };
        nativeBuildInputs = with pySelf; [ setuptools ];
        pytestFlagsArray = [
          # DeprecationWarning: datetime.datetime.utcnow() is deprecated
          # and scheduled for removal in a future version.
          # Use timezone-aware objects to represent datetimes in UTC:
          # datetime.datetime.now(datetime.UTC).
          "-W ignore::DeprecationWarning"
        ];
      };
      flask-session = pySuper.flask-session.overridePythonAttrs rec {
        version = "0.5.0";
        src = fetchFromGitHub {
          owner = "palletc-eco";
          repo = "flask-session";
          tag = version;
          hash = "sha256-t8w6ZS4gBDpnnKvL3DLtn+rRLQNJbrT2Hxm4f3+a3Xc=";
        };
        nativeCheckInputs = with pySelf; [ pytestCheckHook ];
        pytestFlagsArray = [
          "-k"
          "'null_session or filesystem_session'"
        ];
        dependencies = with pySelf; [
          flask-sqlalchemy
          cachelib
        ];
        disabledTests = [ ];
        disabledTestPaths = [ ];
        preCheck = "";
        postCheck = "";
      };
      # flask-appbuilder doesn't work with sqlalchemy 2.x, flask-appbuilder 3.x
      # https://github.com/dpgaspar/Flask-AppBuilder/issues/2038
      flask-appbuilder = pySuper.flask-appbuilder.overridePythonAttrs (o: {
      flask-appbuilder = pySuper.flask-appbuilder.overridePythonAttrs {
        meta.broken = false;
      });
      };
      # a knock-on effect from overriding the sqlalchemy version
      flask-sqlalchemy = pySuper.flask-sqlalchemy.overridePythonAttrs (o: {
      flask-sqlalchemy = pySuper.flask-sqlalchemy.overridePythonAttrs {
        src = fetchPypi {
          pname = "Flask-SQLAlchemy";
          version = "2.5.1";
          hash = "sha256-K9pEtD58rLFdTgX/PMH4vJeTbMRkYjQkECv8LDXpWRI=";
        };
        format = "setuptools";
      });
      httpcore = pySuper.httpcore.overridePythonAttrs (o: {
      };
      httpcore = pySuper.httpcore.overridePythonAttrs {
        # nullify upstream's pytest flags which cause
        # "TLS/SSL connection has been closed (EOF)"
        # with pytest-httpbin 1.x
@@ -82,20 +157,121 @@ let
          substituteInPlace pyproject.toml \
            --replace '[tool.pytest.ini_options]' '[tool.notpytest.ini_options]'
        '';
      });
      pytest-httpbin = pySuper.pytest-httpbin.overridePythonAttrs (o: rec {
      };
      pytest-httpbin = pySuper.pytest-httpbin.overridePythonAttrs rec {
        version = "1.0.2";
        src = fetchFromGitHub {
          owner = "kevin1024";
          repo = "pytest-httpbin";
          rev = "refs/tags/v${version}";
          tag = "v${version}";
          hash = "sha256-S4ThQx4H3UlKhunJo35esPClZiEn7gX/Qwo4kE1QMTI=";
        };
      });
        # Use unmerged PR #65 to fix older version:
        #   https://github.com/kevin1024/pytest-httpbin/pull/65/
        # It was closed in favour of another which isn't compatible with the overriden version.
        patches = [
          (fetchpatch {
            url = "https://github.com/kevin1024/pytest-httpbin/commit/4e325f877ff8f77dec9f380bd8e53bb42976775c.patch";
            hash = "sha256-a33XcdMupD+7ZzvUibePdldGImmPLDNU2sxRbwpveDA=";
          })
          (fetchpatch {
            url = "https://github.com/kevin1024/pytest-httpbin/commit/463afb9b200563ac6fe7ae535f7a7a3c818b0418.patch";
            hash = "sha256-HFmuLtAtEjnB6heSG1YNnqxtz2phXNkHbQaZyB5bLJs=";
          })
        ];
        disabledTests = [
          "test_httpbin_secure_accepts_get_requests"
          "test_httpbin_secure_accepts_lots_of_get_requests"
          "test_httpbin_both[https]"
          "test_chunked_encoding[https]"
          "TestClassBassedTests::test_http_secure"
          "test_dont_crash_on_certificate_problems"
          "test_redirect_location_is_https_for_secure_server"
          "test_httpbin_secure_accepts_get_requests"
          "test_http_secure"
        ];
      };
      # apache-airflow doesn't work with sqlalchemy 2.x
      # https://github.com/apache/airflow/issues/28723
      sqlalchemy = pySuper.sqlalchemy_1_4;

      gitpython = pySuper.gitpython.overridePythonAttrs rec {
        version = "3.1.44";
        src = fetchFromGitHub {
          owner = "gitpython-developers";
          repo = "gitpython";
          rev = version;
          hash = "sha256-KnKaBv/tKk4wiGWUWCEgd1vgrTouwUhqxJ1/nMjRaWk=";
        };
      };
      # ValueError: Unknown classifier in field `project.classifiers`: Programming Language :: Python :: Free Threading :: 2 - Beta
      urllib3 = pySuper.urllib3.overridePythonAttrs rec {
        version = "2.5.0";
        src = fetchPypi {
          pname = "urllib3";
          inherit version;
          hash = "sha256-P8R3M8fkGdS8P2s9wrT4kLt0OQajDVa6Slv6S7/5J2A=";
        };
        postPatch = ''
          substituteInPlace pyproject.toml \
            --replace-fail ', "setuptools-scm>=8,<9"' ""
        '';
      };
      smmap = pySuper.smmap.overridePythonAttrs rec {
        version = "5.0.2";
        src = fetchFromGitHub {
          owner = "gitpython-developers";
          repo = "smmap";
          rev = "refs/tags/v${version}";
          hash = "sha256-0Y175kjv/8UJpSxtLpWH4/VT7JrcVPAq79Nf3rtHZZM=";
        };
      };
      trove-classifiers = pySuper.trove-classifiers.overridePythonAttrs rec {
        version = "2024.10.21.16";
        src = fetchPypi {
          pname = "trove_classifiers";
          inherit version;
          hash = "sha256-F8vQVdZ9Xp2d5jKTqHMpQ/q8IVdOTHt07fEStJKM9fM=";
        };
        postPatch = "";
      };
      packaging = pySuper.packaging.overridePythonAttrs rec {
        version = "24.2";
        src = fetchPypi {
          pname = "packaging";
          inherit version;
          hash = "sha256-wiim3F6TLTRrxXOTeRCdSeiFPdgiNXHHxbVSYO3AuX8=";
        };
      };
      pluggy = pySuper.pluggy.overridePythonAttrs rec {
        version = "1.5.0";
        src = fetchFromGitHub {
          owner = "pytest-dev";
          repo = "pluggy";
          tag = version;
          hash = "sha256-f0DxyZZk6RoYtOEXLACcsOn2B+Hot4U4g5Ogr/hKmOE=";
        };
      };
      pyproject-api = pySuper.pyproject-api.overridePythonAttrs rec {
        version = "1.8.0";
        src = fetchPypi {
          pname = "pyproject_api";
          inherit version;
          hash = "sha256-d7gEny/rXTPu/MIbV/HieWNid6isita1hxA3skN3hJY=";
        };
        disabledTests = [
          # AssertionError: assert ['magic>3', 'requests>2'] == ['magic >3', 'requests >2']
          "test_frontend_setuptools"
        ];
      };
      tox = pySuper.tox.overridePythonAttrs rec {
        version = "4.27.0";
        src = fetchFromGitHub {
          owner = "tox-dev";
          repo = "tox";
          tag = version;
          hash = "sha256-Z3qUK4w1ebPvdZD4ZuKgZXJPUu5lG0G41vn/pc9gC/0=";
        };
      };
      apache-airflow = pySelf.callPackage ./python-package.nix { };
    };
  };
+868 −109

File changed.

Preview size limit exceeded, changes collapsed.

+133 −128
Original line number Diff line number Diff line
{
  lib,
  stdenv,
  python,
  buildPythonPackage,
  fetchFromGitHub,
  writeScript,

  # javascript
  fetchYarnDeps,
  nodejs,
  webpack-cli,
  yarnBuildHook,
  yarnConfigHook,

  # python
  alembic,
  argcomplete,
  asgiref,
  attrs,
  blinker,
  cached-property,
  cattrs,
  clickclick,
  buildPythonPackage,
  colorlog,
  configupdater,
  connexion,
  cron-descriptor,
  croniter,
  cryptography,
  deprecated,
  dill,
  flask,
  flask-login,
  flask-appbuilder,
  flask-caching,
  flask-login,
  flask-session,
  flask-wtf,
  fsspec,
  gitdb,
  gitpython,
  google-re2,
  graphviz,
  gunicorn,
  httpx,
  iso8601,
  importlib-resources,
  importlib-metadata,
  inflection,
  itsdangerous,
  jinja2,
  jsonschema,
  hatchling,
  lazy-object-proxy,
  linkify-it-py,
  lockfile,
  markdown,
  markupsafe,
  marshmallow-oneofschema,
  mdit-py-plugins,
  numpy,
  openapi-spec-validator,
  methodtools,
  opentelemetry-api,
  opentelemetry-exporter-otlp,
  packaging,
  pandas,
  pathspec,
  pendulum,
  pluggy,
  psutil,
  pydantic,
  pygments,
  pyjwt,
  pytest-asyncio,
  pytestCheckHook,
  python,
  python-daemon,
  python-dateutil,
  python-nvd3,
  python-slugify,
  python3-openid,
  pythonOlder,
  pyyaml,
  rich,
  rich-argparse,
  setproctitle,
  smmap,
  sqlalchemy,
  sqlalchemy-jsonfield,
  swagger-ui-bundle,
  tabulate,
  tenacity,
  termcolor,
  typing-extensions,
  unicodecsv,
  werkzeug,
  freezegun,
  pytest-asyncio,
  pytestCheckHook,
  time-machine,
  yarnConfigHook,
  yarnBuildHook,
  nodejs,
  webpack-cli,
  fetchYarnDeps,
  writeScript,
  tomli,
  trove-classifiers,
  universal-pathlib,

  # Extra airflow providers to enable
  enabledProviders ? [ ],
}:
let
  version = "2.7.3";
  version = "2.10.5";

  airflow-src = fetchFromGitHub {
    owner = "apache";
@@ -99,7 +75,7 @@ let
    # Download using the git protocol rather than using tarballs, because the
    # GitHub archive tarballs don't appear to include tests
    forceFetchGit = true;
    hash = "sha256-+YbiKFZLigSDbHPaUKIl97kpezW1rIt/j09MMa6lwhQ=";
    hash = "sha256-q5/CM+puXE31+15F3yZmcrR74LrqHppdCDUqjLQXPfk=";
  };

  # airflow bundles a web interface, which is built using webpack by an undocumented shell script in airflow's source tree.
@@ -112,7 +88,7 @@ let

    offlineCache = fetchYarnDeps {
      yarnLock = "${src}/yarn.lock";
      hash = "sha256-WQKuQgNp35fU6z7owequXOSwoUGJDJYcUgkjPDMOops=";
      hash = "sha256-hKgtMH4c8sPRDLPLVn+H8rmwc2Q6ei6U4er6fGuFn4I=";
    };

    nativeBuildInputs = [
@@ -122,10 +98,10 @@ let
      webpack-cli
    ];

    # The webpack license plugin tries to create /licenses when given the
    # The webpack license plugin tries to create /3rd-party-licenses when given the
    # original relative path
    postPatch = ''
      sed -i 's!../../../../licenses/LICENSES-ui.txt!licenses/LICENSES-ui.txt!' webpack.config.js
      sed -i 's!../../../../3rd-party-licenses/LICENSES-ui.txt!/3rd-party-licenses/LICENSES-ui.txt!' webpack.config.js
    '';

    postBuild = ''
@@ -138,129 +114,165 @@ let
    '';
  };

  requiredProviders = [
    "common_compat"
    "common_io"
    "common_sql"
    "fab"
    "ftp"
    "http"
    "imap"
    "smtp"
    "sqlite"
  ];

  # Import generated file with metadata for provider dependencies and imports.
  # Enable additional providers using enabledProviders above.
  providers = import ./providers.nix;
  getProviderPath = provider: lib.replaceStrings [ "_" ] [ "/" ] provider;
  getProviderDeps = provider: map (dep: python.pkgs.${dep}) providers.${provider}.deps;
  getProviderImports = provider: providers.${provider}.imports;
  providerDependencies = lib.concatMap getProviderDeps enabledProviders;
  providerImports = lib.concatMap getProviderImports enabledProviders;

  buildProvider =
    provider:
    let
      providerPath = getProviderPath provider;
    in
    python.pkgs.buildPythonPackage {
      pname = "apache-airflow-providers-${provider}";
      version = "unstable"; # will be extracted in the build phase
      pyproject = false; # providers packages don't have pyproject.toml nor setup.py

      src = airflow-src;

      propagatedBuildInputs = getProviderDeps provider;
      dependencies = [ packaging ];

      buildPhase = ''
        # extract version from the provider's __init__.py file
        if [ -f "airflow/providers/${providerPath}/__init__.py" ]; then
          version=$(grep -oP "(?<=__version__ = ')[^']+" "airflow/providers/${providerPath}/__init__.py" || echo "0.0.0")
          echo "Provider ${provider} version: $version"
        else
          echo "Error: __init__.py not found for provider ${provider} at path airflow/providers/${providerPath}"
          exit 1
        fi
      '';

      installPhase = ''
                      # create directory structure
                      mkdir -p $out/${python.sitePackages}/airflow/providers

                      # copy the provider directory
                      if [ -d "airflow/providers/${providerPath}" ]; then
                        mkdir -p $out/${python.sitePackages}/airflow/providers/$(dirname "${providerPath}")
                        cp -r airflow/providers/${providerPath} $out/${python.sitePackages}/airflow/providers/$(dirname "${providerPath}")

                        # create parent __init__.py files
                        touch $out/${python.sitePackages}/airflow/__init__.py
                        touch $out/${python.sitePackages}/airflow/providers/__init__.py

                        # create any needed intermediate __init__.py files for nested providers
                        providerDir=$(dirname "${providerPath}")
                        while [ "$providerDir" != "." ] && [ -n "$providerDir" ]; do
                          mkdir -p $out/${python.sitePackages}/airflow/providers/$providerDir
                          touch $out/${python.sitePackages}/airflow/providers/$providerDir/__init__.py
                          providerDir=$(dirname "$providerDir")
                        done

                        # create egg-info for package discovery
                        mkdir -p $out/${python.sitePackages}/apache_airflow_providers_${provider}.egg-info
                        cat > $out/${python.sitePackages}/apache_airflow_providers_${provider}.egg-info/PKG-INFO <<EOF
        Metadata-Version: 2.1
        Name: apache-airflow-providers-${lib.replaceStrings [ "_" ] [ "-" ] provider}
        Version: ${version}
        Summary: Apache Airflow Provider for ${provider}
        EOF
                      else
                        echo "Provider directory not found: airflow/providers/${providerPath}"
                        exit 1
                      fi
      '';
    };

  providerPackages = map buildProvider (requiredProviders ++ enabledProviders);

in
buildPythonPackage rec {
  pname = "apache-airflow";
  inherit version;
  format = "setuptools";
  src = airflow-src;
  pyproject = true;

  disabled = pythonOlder "3.7";
  nativeBuildInputs = [ hatchling ];

  propagatedBuildInputs = [
  dependencies = [
    alembic
    argcomplete
    asgiref
    attrs
    blinker
    cached-property
    cattrs
    clickclick
    colorlog
    configupdater
    connexion
    cron-descriptor
    croniter
    cryptography
    deprecated
    dill
    flask
    flask-appbuilder
    flask-caching
    flask-session
    flask-wtf
    flask-login
    flask-session
    fsspec
    gitdb
    gitpython
    google-re2
    graphviz
    gunicorn
    httpx
    iso8601
    importlib-resources
    inflection
    itsdangerous
    jinja2
    jsonschema
    lazy-object-proxy
    linkify-it-py
    lockfile
    markdown
    markupsafe
    marshmallow-oneofschema
    mdit-py-plugins
    numpy
    openapi-spec-validator
    methodtools
    opentelemetry-api
    opentelemetry-exporter-otlp
    packaging
    pandas
    pathspec
    pendulum
    pluggy
    psutil
    pydantic
    pygments
    pyjwt
    python-daemon
    python-dateutil
    python-nvd3
    python-slugify
    python3-openid
    pyyaml
    rich
    rich-argparse
    setproctitle
    smmap
    sqlalchemy
    sqlalchemy-jsonfield
    swagger-ui-bundle
    tabulate
    tenacity
    termcolor
    typing-extensions
    unicodecsv
    werkzeug
  ]
  ++ lib.optionals (pythonOlder "3.9") [
    importlib-metadata
    tomli
    trove-classifiers
    universal-pathlib
  ]
  ++ providerDependencies;

  buildInputs = [
    airflow-frontend
  ];
  ++ providerPackages;

  nativeCheckInputs = [
    freezegun
    pytest-asyncio
    pytestCheckHook
    time-machine
    marshmallow-oneofschema
  ];

  # By default, source code of providers is included but unusable due to missing
  # transitive dependencies. To enable a provider, add it to extraProviders
  # above
  INSTALL_PROVIDERS_FROM_SOURCES = "true";
  checkPhase = ''
    export PYTEST_ADDOPTS="--asyncio_default_fixture_loop_scope=cache"
  '';

  postPatch = ''
    # https://github.com/apache/airflow/issues/33854
    substituteInPlace pyproject.toml \
      --replace '[project]' $'[project]\nname = "apache-airflow"\nversion = "${version}"'
  ''
  + lib.optionalString stdenv.hostPlatform.isDarwin ''
    # Fix failing test on Hydra
    substituteInPlace airflow/utils/db.py \
      --replace "/tmp/sqlite_default.db" "$TMPDIR/sqlite_default.db"
      --replace-fail "hatchling==1.27.0" "hatchling" \
      --replace-fail "\"/airflow/providers/\"," ""
  '';

  pythonRelaxDeps = [
    "apache-airflow-providers-fab" # fab provider package has wrong version
    "colorlog"
    "flask-appbuilder"
    "opentelemetry-api"
    "pathspec"
  ];

@@ -285,10 +297,6 @@ buildPythonPackage rec {
    export AIRFLOW__CORE__UNIT_TEST_MODE=True
    export AIRFLOW_DB="$HOME/airflow.db"
    export PATH=$PATH:$out/bin

    airflow version
    airflow db init
    airflow db reset -y
  '';

  enabledTestPaths = [
@@ -325,25 +333,22 @@ buildPythonPackage rec {
  # You can (manually) test the web UI as follows:
  #
  #   nix shell .#apache-airflow
  #   airflow version
  #   airflow db reset  # WARNING: this will wipe any existing db state you might have!
  #   airflow db init
  #   airflow standalone
  #
  # Then navigate to the localhost URL using the credentials printed, try
  # triggering the 'example_bash_operator' and 'example_bash_operator' DAGs and
  # see if they report success.
  # triggering the 'example_bash_operator' DAG and see if it reports success.

  meta = {
    description = "Programmatically author, schedule and monitor data pipelines";
    homepage = "https://airflow.apache.org/";
    changelog = "https://airflow.apache.org/docs/apache-airflow/${version}/release_notes.html";
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [
      gbpdt
      ingenieroariel
    ];
    knownVulnerabilities = [
      "CVE-2023-50943"
      "CVE-2023-50944"
      taranarmo
    ];
  };
}
+2 −2
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ EXTRA_REQS = {


def get_version():
    with open(os.path.dirname(sys.argv[0]) + "/default.nix") as fh:
    with open(os.path.dirname(sys.argv[0]) + "/python-package.nix") as fh:
        # A version consists of digits, dots, and possibly a "b" (for beta)
        m = re.search('version = "([\\d\\.b]+)";', fh.read())
        return m.group(1)
@@ -47,7 +47,7 @@ def get_file_from_github(version: str, path: str):


def repository_root() -> Path:
    return Path(os.path.dirname(sys.argv[0])) / "../../.."
    return Path(os.path.dirname(sys.argv[0])) / "../../../.."


def dump_packages() -> Dict[str, Dict[str, str]]: