Commit 13e11d05 authored by Gaetan Lepage's avatar Gaetan Lepage
Browse files
parent beb50371
Loading
Loading
Loading
Loading
+31 −47
Original line number Diff line number Diff line
{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,
  rustPlatform,
  pytestCheckHook,
  libiconv,
  numpy,
  protobuf,

  # nativeBuildInputs
  protoc,

  # buildInputs
  protobuf,

  # dependencies
  pyarrow,
  typing-extensions,
  pythonOlder,
}:

let
  arrow-testing = fetchFromGitHub {
    name = "arrow-testing";
    owner = "apache";
    repo = "arrow-testing";
    rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
    hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
  };

  parquet-testing = fetchFromGitHub {
    name = "parquet-testing";
    owner = "apache";
    repo = "parquet-testing";
    rev = "50af3d8ce206990d81014b1862e5ce7380dc3e08";
    hash = "sha256-edyv/r5olkj09aHtm8LHZY0b3jUtLNUcufwI41qKYaY=";
  };
in
  # tests
  numpy,
  pytest-asyncio,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "datafusion";
  version = "40.1.0";
  version = "48.0.0";
  pyproject = true;

  src = fetchFromGitHub {
@@ -42,13 +30,14 @@ buildPythonPackage rec {
    owner = "apache";
    repo = "arrow-datafusion-python";
    tag = version;
    hash = "sha256-5WOSlx4XW9zO6oTY16lWQElShLv0ubflVPfSSEGrFgg=";
    # Fetch arrow-testing and parquet-testing (tests assets)
    fetchSubmodules = true;
    hash = "sha256-9IOkb31f4nFo9mWTr+z5ZG8xSXIZSgW3vCBgLaGxpfI=";
  };

  cargoDeps = rustPlatform.fetchCargoVendor {
    name = "datafusion-cargo-deps";
    inherit src;
    hash = "sha256-xUpchV4UFEX1HkCpClOwxnEfGLVlOIX4UmzYKiUth9U=";
    inherit pname src version;
    hash = "sha256-P9NFvhHAGgYIi36CHEPZPr8hmMNp5zrCcmE7NHx51k4=";
  };

  nativeBuildInputs = with rustPlatform; [
@@ -59,9 +48,6 @@ buildPythonPackage rec {

  buildInputs = [
    protobuf
  ]
  ++ lib.optionals stdenv.hostPlatform.isDarwin [
    libiconv
  ];

  dependencies = [
@@ -70,28 +56,26 @@ buildPythonPackage rec {
  ];

  nativeCheckInputs = [
    pytestCheckHook
    numpy
    pytest-asyncio
    pytestCheckHook
  ];

  pythonImportsCheck = [ "datafusion" ];

  pytestFlags = [
    "--pyargs"
    pname
  pythonImportsCheck = [
    "datafusion"
    "datafusion._internal"
  ];

  preCheck = ''
    pushd $TMPDIR
    ln -s ${arrow-testing} ./testing
    ln -s ${parquet-testing} ./parquet
    rm -rf python/datafusion
  '';

  postCheck = ''
    popd
  '';
  disabledTests = [
    # Exception: DataFusion error (requires internet access)
    "test_register_http_csv"
  ];

  meta = with lib; {
  meta = {
    description = "Extensible query execution framework";
    longDescription = ''
      DataFusion is an extensible query execution framework, written in Rust,
@@ -99,7 +83,7 @@ buildPythonPackage rec {
    '';
    homepage = "https://arrow.apache.org/datafusion/";
    changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md";
    license = with licenses; [ asl20 ];
    maintainers = with maintainers; [ cpcloud ];
    license = with lib.licenses; [ asl20 ];
    maintainers = with lib.maintainers; [ cpcloud ];
  };
}