Unverified Commit e76dc010 authored by Yt's avatar Yt Committed by GitHub
Browse files

docling: init at 2.17.0 (#359783)

parents ceaea203 b0118538
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
{ python3Packages }:

python3Packages.toPythonApplication python3Packages.docling
+19 −5
Original line number Diff line number Diff line
@@ -3,12 +3,18 @@
  buildPythonPackage,
  fetchFromGitHub,
  poetry-core,
  # dependencies
  jsonref,
  jsonschema,
  pandas,
  pillow,
  pydantic,
  tabulate,
  pyyaml,
  typing-extensions,
  transformers,
  typer,
  latex2mathml,
  jsondiff,
  requests,
  pytestCheckHook,
@@ -16,14 +22,14 @@

buildPythonPackage rec {
  pname = "docling-core";
  version = "2.3.2";
  version = "2.16.1";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "DS4SD";
    repo = "docling-core";
    tag = "v${version}";
    hash = "sha256-N8rL+5bCVF4Qi5eqgkaB2r3LTYoqTVPeK4gQ6stiW/w=";
    hash = "sha256-oW/jX9IHCpztc0FDm8/3OzDmOxM92jrkFq/JeAcI9ZA=";
  };

  build-system = [
@@ -31,12 +37,18 @@ buildPythonPackage rec {
  ];

  dependencies = [
    jsonref
    jsonschema
    pandas
    pillow
    pydantic
    jsonref
    tabulate
    pandas
    pillow
    pyyaml
    typing-extensions
    transformers
    # semchunk
    typer
    latex2mathml
  ];

  pythonRelaxDeps = [
@@ -47,6 +59,8 @@ buildPythonPackage rec {
    "docling_core"
  ];

  doCheck = false;

  nativeCheckInputs = [
    jsondiff
    pytestCheckHook
+22 −12
Original line number Diff line number Diff line
@@ -3,28 +3,30 @@
  buildPythonPackage,
  fetchFromGitHub,
  poetry-core,
  # dependencies
  torch,
  torchvision,
  transformers,
  huggingface-hub,
  jsonlines,
  mean-average-precision,
  numpy,
  opencv-python-headless,
  pillow,
  torch,
  torchvision,
  tqdm,
  safetensors,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "docling-ibm-models";
  version = "2.0.4";
  version = "3.3.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "DS4SD";
    repo = "docling-ibm-models";
    tag = "v${version}";
    hash = "sha256-QZvkkazxgkGuSQKIYI+YghH7pLlDSEbCGhg89gZsOpk=";
    hash = "sha256-wxkHd+TCBibOTWO09JOsjX6oBtUxZ/9IOmyLdeptzeQ=";
  };

  build-system = [
@@ -32,21 +34,23 @@ buildPythonPackage rec {
  ];

  dependencies = [
    huggingface-hub
    jsonlines
    mean-average-precision
    numpy
    opencv-python-headless
    pillow
    torch
    torchvision
    transformers
    numpy
    jsonlines
    pillow
    tqdm
    opencv-python-headless
    huggingface-hub
    safetensors
  ];

  pythonRelaxDeps = [
    "mean_average_precision"
    "pillow"
    "torchvision"
    "transformers"
    "numpy"
  ];

  pythonImportsCheck = [
@@ -57,10 +61,16 @@ buildPythonPackage rec {
    pytestCheckHook
  ];

  preCheck = ''
    export HOME="$TEMPDIR"
  '';

  disabledTests = [
    # Requires network access
    "test_layoutpredictor"
    "test_tf_predictor"
    "test_code_formula_predictor" # huggingface_hub.errors.LocalEntryNotFoundError
    "test_figure_classifier" # huggingface_hub.errors.LocalEntryNotFoundError
  ];

  meta = {
+15 −3
Original line number Diff line number Diff line
@@ -7,26 +7,30 @@
  cxxopts,
  poetry-core,
  pybind11,
  tabulate,
  zlib,
  nlohmann_json,
  utf8cpp,
  libjpeg,
  qpdf,
  loguru-cpp,
  # python dependencies
  tabulate,
  pillow,
  pydantic,
  docling-core,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "docling-parse";
  version = "2.0.3";
  version = "3.1.2";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "DS4SD";
    repo = "docling-parse";
    tag = "v${version}";
    hash = "sha256-pZJ7lneg4ftAoWS5AOflkkKCwZGF4TJIuqDjq4W4VBw=";
    hash = "sha256-SgVLk1kruUSjtzuo/5YFY4Keha8zMzovm/UeCtfGaNY=";
  };

  dontUseCmakeConfigure = true;
@@ -61,6 +65,14 @@ buildPythonPackage rec {

  dependencies = [
    tabulate
    pillow
    pydantic
    docling-core
  ];

  pythonRelaxDeps = [
    "pydantic"
    "pillow"
  ];

  pythonImportsCheck = [
+150 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  # dependencies
  pydantic,
  docling-core,
  docling-ibm-models,
  deepsearch-glm,
  docling-parse,
  filetype,
  pypdfium2,
  pydantic-settings,
  huggingface-hub,
  requests,
  easyocr,
  tesserocr,
  certifi,
  rtree,
  scipy,
  typer,
  python-docx,
  python-pptx,
  beautifulsoup4,
  pandas,
  marko,
  openpyxl,
  lxml,
  # ocrmac # not yet packaged
  rapidocr-onnxruntime,
  onnxruntime,
  pillow,
  pyarrow,
  # build system
  poetry-core,
  # optional dependencies
  mkdocs-material,
  mkdocs-jupyter,
  # mkdocs-click # not yet packaged
  mkdocstrings,
  # native check inputs
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "docling";
  version = "2.17.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "DS4SD";
    repo = "docling";
    tag = "v${version}";
    hash = "sha256-OtUFQRNqyTGT1Z41tHziwM5hqbk+tg/97bxhtPVtmN0=";
  };

  build-system = [
    poetry-core
  ];

  dependencies = [
    pydantic
    docling-core
    docling-ibm-models
    deepsearch-glm
    docling-parse
    filetype
    pypdfium2
    pydantic-settings
    huggingface-hub
    requests
    easyocr
    tesserocr
    certifi
    rtree
    scipy
    typer
    python-docx
    python-pptx
    beautifulsoup4
    pandas
    marko
    openpyxl
    lxml
    # ocrmac # not yet packaged
    rapidocr-onnxruntime
    onnxruntime
    pillow
    pyarrow
  ];

  pythonRelaxDeps = [
    "pillow"
  ];

  optional-dependencies = {
    ocrmac = [
      # ocrmac # not yet packaged
    ];
    rapidocr = [
      onnxruntime
      rapidocr-onnxruntime
    ];
    tesserocr = [
      tesserocr
    ];

    docs = [
      mkdocs-material
      mkdocs-jupyter
      # mkdocs-click # not yet packaged
      mkdocstrings
      # griffle-pydantic
    ];
  };

  preCheck = ''
    export HOME="$TEMPDIR"
  '';

  nativeCheckInputs = [
    pytestCheckHook
  ];

  pythonImportsCheck = [
    "docling"
  ];

  disabledTests = [
    "test_e2e_pdfs_conversions" # AssertionError: ## TableFormer: Table Structure Understanding with Transf
    "test_e2e_conversions" # RuntimeError: Tesseract is not available
    # huggingface_hub.errors.LocalEntryNotFoundError: An error happened
    "test_cli_convert"
    "test_code_and_formula_conversion"
    "test_picture_classifier"
    "test_convert_path"
    "test_convert_stream"
    "test_compare_legacy_output"
    "test_ocr_coverage_threshold"
  ];

  meta = {
    description = "Get your documents ready for gen AI";
    homepage = "https://github.com/DS4SD/docling";
    changelog = "https://github.com/DS4SD/docling/blob/${src.rev}/CHANGELOG.md";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ happysalada ];
    mainProgram = "docling";
  };
}
Loading