Unverified Commit 82b0ae66 authored by ryota2357's avatar ryota2357
Browse files

python3Packages.pymupdf4llm: init at 0.0.17

parent d7e271ce
Loading
Loading
Loading
Loading
+56 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  pymupdf,
}:

buildPythonPackage rec {
  pname = "pymupdf4llm";
  version = "0.0.17";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "pymupdf";
    repo = "RAG";
    tag = "v${version}";
    hash = "sha256-+RLK+UorkU8eVQJGrc7pVNZPtIpxMgA9mBKA6GeWUa0=";
  };

  sourceRoot = "${src.name}/pymupdf4llm";

  build-system = [ setuptools ];

  dependencies = [ pymupdf ];

  checkPhase = ''
    runHook preCheck

    python3 - <<'EOF'
    import fitz
    import pymupdf4llm

    doc = fitz.open()
    page = doc.new_page()
    page.insert_text((72, 72), "Hello, Nix!")
    doc.save("input.pdf")

    md = pymupdf4llm.to_markdown("input.pdf")
    assert isinstance(md, str), "Returned value is not a string"
    assert "Hello, Nix!" in md, "Returned value does not contain the expected text"
    EOF

    runHook postCheck
  '';

  pythonImportsCheck = [ "pymupdf4llm" ];

  meta = {
    description = "PyMuPDF Utilities for LLM/RAG - converts PDF pages to Markdown format for Retrieval-Augmented Generation";
    homepage = "https://github.com/pymupdf/RAG";
    changelog = "https://github.com/pymupdf/RAG/blob/${src.tag}/CHANGES.md";
    license = lib.licenses.agpl3Only;
    maintainers = with lib.maintainers; [ ryota2357 ];
  };
}
+2 −0
Original line number Diff line number Diff line
@@ -12870,6 +12870,8 @@ self: super: with self; {
  pymupdf-fonts = callPackage ../development/python-modules/pymupdf-fonts { };
  pymupdf4llm = callPackage ../development/python-modules/pymupdf4llm { };
  pymvglive = callPackage ../development/python-modules/pymvglive { };
  pymysensors = callPackage ../development/python-modules/pymysensors { };