Unverified Commit b26cc12a authored by Doron Behar's avatar Doron Behar Committed by GitHub
Browse files

py-wacz: init at 0.5.0 (#420437)

parents 6106c710 116c9317
Loading
Loading
Loading
Loading
+57 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  warcio,
  surt,
  py3amf,
  multipart,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "cdxj-indexer";
  version = "1.4.6";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "webrecorder";
    repo = "cdxj-indexer";
    tag = "v${version}";
    hash = "sha256-E3b/IfjngyXhWvRYP9CkQGvBFeC8pAm4KxZA9MwOo4s=";
  };

  build-system = [
    setuptools
  ];

  dependencies = [
    warcio
    surt
    py3amf
    multipart
  ];

  pythonRemoveDeps = [
    # Transitive dependency that does not need to be pinned
    # Proposed fix in <https://github.com/webrecorder/cdxj-indexer/pull/25>
    "idna"
  ];

  nativeCheckInputs = [
    pytestCheckHook
  ];

  pythonImportsCheck = [
    "cdxj_indexer"
  ];

  meta = {
    description = "CDXJ Indexing of WARC/ARCs";
    homepage = "https://github.com/webrecorder/cdxj-indexer";
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [ zhaofengli ];
    mainProgram = "cdxj-indexer";
  };
}
+48 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  lxml,
  unittestCheckHook,
}:

buildPythonPackage rec {
  pname = "ezodf";
  version = "0.3.2";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "T0ha";
    repo = "ezodf";
    tag = version;
    hash = "sha256-d66CTj9CpCnMICqNdUP07M9elEfoxuPg8x1kxqgXTTE=";
  };

  build-system = [
    setuptools
  ];

  dependencies = [
    lxml
  ];

  nativeCheckInputs = [
    unittestCheckHook
  ];

  unittestFlags = [
    "tests"
  ];

  pythonImportsCheck = [
    "ezodf"
  ];

  meta = {
    description = "Extract, add, modify, or delete document data in OpenDocument (ODF) files";
    homepage = "https://github.com/T0ha/ezodf";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ zhaofengli ];
  };
}
+215 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  hatchling,
  attrs,
  chardet,
  humanize,
  isodate,
  jinja2,
  jsonschema,
  marko,
  petl,
  pydantic,
  python-dateutil,
  python-slugify,
  pyyaml,
  requests,
  rfc3986,
  simpleeval,
  tabulate,
  typer,
  typing-extensions,
  validators,

  # Optional formats
  boto3,
  google-api-python-client,
  datasette,
  duckdb,
  duckdb-engine,
  sqlalchemy,
  pygithub,
  pyquery,
  ijson,
  jsonlines,
  pymysql,
  ezodf,
  lxml,
  pandas,
  pyarrow,
  fastparquet,
  psycopg,
  psycopg2,
  visidata,
  tatsu,

  # Tests
  pytestCheckHook,
  pytest-cov,
  pytest-dotenv,
  pytest-lazy-fixtures,
  pytest-mock,
  pytest-timeout,
  pytest-vcr,
  moto,
  requests-mock,

  # Tests depending on excel
  openpyxl,
  xlrd,
}:

buildPythonPackage rec {
  pname = "frictionless";
  version = "5.18.1";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "frictionlessdata";
    repo = "frictionless-py";
    tag = "v${version}";
    hash = "sha256-svspEHcEw994pEjnuzWf0FFaYeFZuqriK96yFAB6/gI=";
  };

  build-system = [
    hatchling
  ];

  dependencies = [
    attrs
    chardet
    humanize
    isodate
    jinja2
    jsonschema
    marko
    petl
    pydantic
    python-dateutil
    python-slugify
    pyyaml
    requests
    rfc3986
    simpleeval
    tabulate
    typer
    typing-extensions
    validators
  ];

  optional-dependencies = {
    # The commented-out formats require dependencies that have not been packaged
    # They are intentionally left in for reference - Please do not remove them
    aws = [
      boto3
    ];
    bigquery = [
      google-api-python-client
    ];
    #ckan = [
    #  frictionless-ckan-mapper # not packaged
    #];
    datasette = [
      datasette
    ];
    duckdb = [
      duckdb
      duckdb-engine
      sqlalchemy
    ];
    #excel = [
    #  openpyxl
    #  tableschema-to-template # not packaged
    #  xlrd
    #  xlwt
    #];
    github = [
      pygithub
    ];
    #gsheets = [
    #  pygsheets # not packaged
    #];
    html = [
      pyquery
    ];
    json = [
      ijson
      jsonlines
    ];
    mysql = [
      pymysql
      sqlalchemy
    ];
    ods = [
      ezodf
      lxml
    ];
    pandas = [
      pandas
      pyarrow
    ];
    parquet = [
      fastparquet
    ];
    postgresql = [
      psycopg
      psycopg2
      sqlalchemy
    ];
    #spss = [
    #  savreaderwriter # not packaged
    #];
    sql = [
      sqlalchemy
    ];
    visidata = [
      # Not ideal: This is actually outside pythonPackages set and depends on whatever
      # Python version the top-level python3Packages set refers to
      visidata
    ];
    wkt = [
      tatsu
    ];
    #zenodo = [
    #  pyzenodo3 # not packaged
    #];
  };

  nativeCheckInputs = [
    pytestCheckHook
    pytest-cov
    pytest-dotenv
    pytest-lazy-fixtures
    pytest-mock
    pytest-timeout
    pytest-vcr
    moto
    requests-mock

    # We do not have all packages for the `excel` format to fully function,
    # but it's required for some of the tests.
    openpyxl
    xlrd
  ] ++ lib.flatten (lib.attrValues optional-dependencies);

  disabledTestPaths = [
    # Requires optional dependencies that have not been packaged (commented out above)
    # The tests of other unavailable formats are auto-skipped
    "frictionless/formats/excel"
    "frictionless/formats/spss"
  ];

  pythonImportsCheck = [
    "frictionless"
  ];

  meta = {
    description = "Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data";
    homepage = "https://github.com/frictionlessdata/frictionless-py";
    changelog = "https://github.com/frictionlessdata/frictionless-py/blob/${src.rev}/CHANGELOG.md";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ zhaofengli ];
  };
}
+40 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  setuptools,
  fetchFromGitHub,
  defusedxml,
}:

buildPythonPackage rec {
  pname = "py3amf";
  version = "0.8.11";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "StdCarrot";
    repo = "Py3AMF";
    tag = "v${version}";
    hash = "sha256-9zuHh5+ggIjv1LcjpBNHy2yh09KsFpxUdGrtKGm94Zg=";
  };

  build-system = [
    setuptools
  ];

  dependencies = [
    defusedxml
  ];

  pythonImportsCheck = [
    "pyamf"
  ];

  meta = {
    description = "Action Message Format (AMF) support for Python 3";
    homepage = "https://github.com/StdCarrot/Py3AMF";
    changelog = "https://github.com/StdCarrot/Py3AMF/blob/${src.rev}/CHANGES.txt";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ zhaofengli ];
  };
}
+82 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  fetchpatch,
  setuptools,
  boilerpy3,
  cdxj-indexer,
  frictionless,
  pytest-cov,
  pyyaml,
  shortuuid,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "wacz";
  version = "0.5.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "webrecorder";
    repo = "py-wacz";
    tag = "v${version}";
    hash = "sha256-bGY6G7qBAN1Vu+pTNqRG0xh34sR62pMhQFHFGlJaTPQ=";
  };

  patches = [
    # <https://github.com/webrecorder/py-wacz/pull/47>
    (fetchpatch {
      name = "clean-up-deps.patch";
      url = "https://github.com/webrecorder/py-wacz/compare/1e8f724a527f28855eedeb0d969ee39b00b2a80a...9d3ad60f125247b8a4354511d9123b85ce6a23c5.patch";
      hash = "sha256-zH6BKhsq9ybjzaWcNbVkk1sWh8vVCkv7Qxuwl0MQhNM=";
    })
  ];

  postPatch = ''
    substituteInPlace setup.py \
      --replace "pytest-runner" ""
  '';

  build-system = [
    setuptools
  ];

  dependencies = [
    boilerpy3
    cdxj-indexer
    frictionless
    pyyaml
    shortuuid
  ] ++ frictionless.optional-dependencies.json;

  optional-dependencies = {
    # signing = [
    #   authsign # not packaged
    # ];
  };

  nativeCheckInputs = [
    pytestCheckHook
    pytest-cov
  ];

  disabledTests = [
    # authsign is not packaged
    "test_verify_signed"
  ];

  pythonImportsCheck = [
    "wacz"
  ];

  meta = {
    description = "Utility for working with web archive data using the WACZ format specification";
    homepage = "https://github.com/webrecorder/py-wacz";
    changelog = "https://github.com/webrecorder/py-wacz/blob/${src.rev}/CHANGES.md";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ zhaofengli ];
    mainProgram = "wacz";
  };
}
Loading