Unverified Commit 9cd16e9b authored by Austin Horstman's avatar Austin Horstman Committed by GitHub
Browse files

k2pdfopt: pin tesseract version (#357698)

parents 92c2525f 8657f42d
Loading
Loading
Loading
Loading
+168 −114
Original line number Diff line number Diff line
{ lib
, stdenv
, runCommand
, fetchzip
, fetchurl
, fetchFromGitHub
, cmake
, jbig2dec
, libjpeg_turbo
, libpng
, makeWrapper
, pkg-config
, zlib
, enableGSL ? true, gsl
, enableGhostScript ? true, ghostscript
, enableMuPDF ? true, mupdf
, enableDJVU ? true, djvulibre
, enableGOCR ? false, gocr # Disabled by default due to crashes
, enableTesseract ? true, leptonica, tesseract
{
  lib,
  stdenv,
  runCommand,
  fetchzip,
  fetchurl,
  fetchFromGitHub,
  cmake,
  jbig2dec,
  libjpeg_turbo,
  libpng,
  makeWrapper,
  pkg-config,
  zlib,
  enableGSL ? true,
  gsl,
  enableGhostScript ? true,
  ghostscript,
  enableMuPDF ? true,
  mupdf,
  enableDJVU ? true,
  djvulibre,
  enableGOCR ? false,
  gocr, # Disabled by default due to crashes
  enableTesseract ? true,
  leptonica,
  tesseract5,
  opencl-headers,
}:

# k2pdfopt is a pain to package. It requires modified versions of mupdf,
@@ -45,7 +54,13 @@

let
  # Create a patch against src based on changes applied in patchCommands
  mkPatch = { name, src, patchCommands }: runCommand "${name}-k2pdfopt.patch" { inherit src; } ''
  mkPatch =
    {
      name,
      src,
      patchCommands,
    }:
    runCommand "${name}-k2pdfopt.patch" { inherit src; } ''
      source $stdenv/setup
      unpackPhase

@@ -66,7 +81,8 @@ let
    url = "http://www.willus.com/${pname}/src/${pname}_v${version}_src.zip";
    hash = "sha256-orQNDXQkkcCtlA8wndss6SiJk4+ImiFCG8XRLEg963k=";
  };
in stdenv.mkDerivation rec {
in
stdenv.mkDerivation rec {
  inherit pname version;
  src = k2pdfopt_src;

@@ -79,7 +95,11 @@ in stdenv.mkDerivation rec {
      --replace "<djvu.h>" "<libdjvu/ddjvuapi.h>"
  '';

  nativeBuildInputs = [ cmake pkg-config makeWrapper ];
  nativeBuildInputs = [
    cmake
    pkg-config
    makeWrapper
  ];

  buildInputs =
    let
@@ -98,13 +118,19 @@ in stdenv.mkDerivation rec {
          cp ${k2pdfopt_src}/mupdf_mod/pdf-* ./source/pdf/
        '';
      };
    mupdf_modded = mupdf.overrideAttrs ({ patches ? [], ... }: {
      mupdf_modded = mupdf.overrideAttrs (
        {
          patches ? [ ],
          ...
        }:
        {
          patches = patches ++ [ mupdf_patch ];
          # This function is missing in font.c, see font-win32.c
          postPatch = ''
            echo "void pdf_install_load_system_font_funcs(fz_context *ctx) {}" >> source/fitz/font.c
          '';
    });
        }
      );

      leptonica_patch = mkPatch {
        name = "leptonica";
@@ -114,9 +140,15 @@ in stdenv.mkDerivation rec {
        };
        patchCommands = "cp -r ${k2pdfopt_src}/leptonica_mod/. ./src/";
      };
    leptonica_modded = leptonica.overrideAttrs ({ patches ? [], ... }: {
      leptonica_modded = leptonica.overrideAttrs (
        {
          patches ? [ ],
          ...
        }:
        {
          patches = patches ++ [ leptonica_patch ];
    });
        }
      );

      tesseract_patch = mkPatch {
        name = "tesseract";
@@ -140,8 +172,19 @@ in stdenv.mkDerivation rec {
          cp ${k2pdfopt_src}/tesseract_mod/openclwrapper.* src/opencl/
        '';
      };
    tesseract_modded = tesseract.override {
      tesseractBase = tesseract.tesseractBase.overrideAttrs ({ patches ? [], ... }: {
      tesseract_modded = tesseract5.override {
        tesseractBase = tesseract5.tesseractBase.overrideAttrs (
          {
            patches ? [ ],
            buildInputs ? [ ],
            ...
          }:
          {
            pname = "tesseract-k2pdfopt";
            version = tesseract_patch.src.rev;
            src = tesseract_patch.src;
            # opencl-headers were removed from tesseract in Version 5.4
            buildInputs = buildInputs ++ [ opencl-headers ];
            patches = patches ++ [ tesseract_patch ];
            # Additional compilation fixes
            postPatch = ''
@@ -151,16 +194,25 @@ in stdenv.mkDerivation rec {
              substituteInPlace include/tesseract/tesseract.h \
                --replace "#include <leptonica.h>" "//#include <leptonica.h>"
            '';
      });
          }
        );
      };
    in
    [ jbig2dec libjpeg_turbo libpng zlib ] ++
    lib.optional enableGSL gsl ++
    lib.optional enableGhostScript ghostscript ++
    lib.optional enableMuPDF mupdf_modded ++
    lib.optional enableDJVU djvulibre ++
    lib.optional enableGOCR gocr ++
    lib.optionals enableTesseract [ leptonica_modded tesseract_modded ];
    [
      jbig2dec
      libjpeg_turbo
      libpng
      zlib
    ]
    ++ lib.optional enableGSL gsl
    ++ lib.optional enableGhostScript ghostscript
    ++ lib.optional enableMuPDF mupdf_modded
    ++ lib.optional enableDJVU djvulibre
    ++ lib.optional enableGOCR gocr
    ++ lib.optionals enableTesseract [
      leptonica_modded
      tesseract_modded
    ];

  dontUseCmakeBuildDir = true;

@@ -173,7 +225,7 @@ in stdenv.mkDerivation rec {
  '';

  preFixup = lib.optionalString enableTesseract ''
    wrapProgram $out/bin/k2pdfopt --set-default TESSDATA_PREFIX ${tesseract}/share/tessdata
    wrapProgram $out/bin/k2pdfopt --set-default TESSDATA_PREFIX ${tesseract5}/share/tessdata
  '';

  meta = with lib; {
@@ -182,7 +234,9 @@ in stdenv.mkDerivation rec {
    changelog = "https://www.willus.com/k2pdfopt/k2pdfopt_version.txt";
    license = licenses.gpl3;
    platforms = platforms.linux;
    maintainers = with maintainers; [ bosu danielfullmer ];
    maintainers = with maintainers; [
      bosu
      danielfullmer
    ];
  };
}