Unverified Commit 025d3a2f authored by Leona Maroni's avatar Leona Maroni Committed by GitHub
Browse files

Merge pull request #303388 from SuperSandro2000/paperless-only-enabled-languages

nixos/paperless: override enabled tesseract languages with the in paperless configured ones
parents b2307803 b579dac4
Loading
Loading
Loading
Loading
+24 −15
Original line number Diff line number Diff line
@@ -3,7 +3,6 @@
with lib;
let
  cfg = config.services.paperless;
  pkg = cfg.package;

  defaultUser = "paperless";
  defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf";
@@ -25,7 +24,7 @@ let
  } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) {
    PAPERLESS_NLTK_DIR = pkgs.symlinkJoin {
      name = "paperless_ngx_nltk_data";
      paths = pkg.nltkData;
      paths = cfg.package.nltkData;
    };
  } // optionalAttrs (cfg.openMPThreadingWorkaround) {
    OMP_NUM_THREADS = "1";
@@ -38,7 +37,7 @@ let
  manage = pkgs.writeShellScript "manage" ''
    set -o allexport # Export the following env vars
    ${lib.toShellVars env}
    exec ${pkg}/bin/paperless-ngx "$@"
    exec ${cfg.package}/bin/paperless-ngx "$@"
  '';

  # Secure the services
@@ -200,7 +199,17 @@ in
      description = "User under which Paperless runs.";
    };

    package = mkPackageOption pkgs "paperless-ngx" { };
    package = mkPackageOption pkgs "paperless-ngx" { } // {
      apply = pkg: pkg.override {
        tesseract5 = pkg.tesseract5.override {
          # always enable detection modules
          enableLanguages = if cfg.settings ? PAPERLESS_OCR_LANGUAGE then
            [ "equ" "osd" ]
              ++ lib.splitString "+" cfg.settings.PAPERLESS_OCR_LANGUAGE
          else null;
        };
      };
    };

    openMPThreadingWorkaround = mkEnableOption ''
      a workaround for document classifier timeouts.
@@ -237,7 +246,7 @@ in
      wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ];
      serviceConfig = defaultServiceConfig // {
        User = cfg.user;
        ExecStart = "${pkg}/bin/celery --app paperless beat --loglevel INFO";
        ExecStart = "${cfg.package}/bin/celery --app paperless beat --loglevel INFO";
        Restart = "on-failure";
        LoadCredential = lib.optionalString (cfg.passwordFile != null) "PAPERLESS_ADMIN_PASSWORD:${cfg.passwordFile}";
      };
@@ -250,8 +259,8 @@ in
        versionFile="${cfg.dataDir}/src-version"
        version=$(cat "$versionFile" 2>/dev/null || echo 0)

        if [[ $version != ${pkg.version} ]]; then
          ${pkg}/bin/paperless-ngx migrate
        if [[ $version != ${cfg.package.version} ]]; then
          ${cfg.package}/bin/paperless-ngx migrate

          # Parse old version string format for backwards compatibility
          version=$(echo "$version" | grep -ohP '[^-]+$')
@@ -264,10 +273,10 @@ in
          if versionLessThan 1.12.0; then
            # Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1
            echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade."
            ${pkg}/bin/paperless-ngx document_index reindex
            ${cfg.package}/bin/paperless-ngx document_index reindex
          fi

          echo ${pkg.version} > "$versionFile"
          echo ${cfg.package.version} > "$versionFile"
        fi
      ''
      + optionalString (cfg.passwordFile != null) ''
@@ -277,7 +286,7 @@ in
        superuserStateFile="${cfg.dataDir}/superuser-state"

        if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then
          ${pkg}/bin/paperless-ngx manage_superuser
          ${cfg.package}/bin/paperless-ngx manage_superuser
          echo "$superuserState" > "$superuserStateFile"
        fi
      '';
@@ -290,7 +299,7 @@ in
      after = [ "paperless-scheduler.service" ];
      serviceConfig = defaultServiceConfig // {
        User = cfg.user;
        ExecStart = "${pkg}/bin/celery --app paperless worker --loglevel INFO";
        ExecStart = "${cfg.package}/bin/celery --app paperless worker --loglevel INFO";
        Restart = "on-failure";
        # The `mbind` syscall is needed for running the classifier.
        SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ];
@@ -308,7 +317,7 @@ in
      after = [ "paperless-scheduler.service" ];
      serviceConfig = defaultServiceConfig // {
        User = cfg.user;
        ExecStart = "${pkg}/bin/paperless-ngx document_consumer";
        ExecStart = "${cfg.package}/bin/paperless-ngx document_consumer";
        Restart = "on-failure";
      };
      environment = env;
@@ -340,8 +349,8 @@ in
          echo "PAPERLESS_SECRET_KEY is empty, refusing to start."
          exit 1
        fi
        exec ${pkg.python.pkgs.gunicorn}/bin/gunicorn \
          -c ${pkg}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application
        exec ${cfg.package.python.pkgs.gunicorn}/bin/gunicorn \
          -c ${cfg.package}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application
      '';
      serviceConfig = defaultServiceConfig // {
        User = cfg.user;
@@ -357,7 +366,7 @@ in
        CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ];
      };
      environment = env // {
        PYTHONPATH = "${pkg.python.pkgs.makePythonPath pkg.propagatedBuildInputs}:${pkg}/lib/paperless-ngx/src";
        PYTHONPATH = "${cfg.package.python.pkgs.makePythonPath cfg.package.propagatedBuildInputs}:${cfg.package}/lib/paperless-ngx/src";
      };
      # Allow the web interface to access the private /tmp directory of the server.
      # This is required to support uploading files via the web interface.
+6 −3
Original line number Diff line number Diff line
@@ -37,8 +37,11 @@ let
  # https://github.com/NixOS/nixpkgs/issues/298719
  # https://github.com/paperless-ngx/paperless-ngx/issues/5494
  python = python3.override {
    packageOverrides = self: super: {
      uvicorn = super.uvicorn.overridePythonAttrs (oldAttrs: {
    packageOverrides = final: prev: {
      # tesseract5 may be overwritten in the paperless module and we need to propagate that to make the closure reduction effective
      ocrmypdf = prev.ocrmypdf.override { tesseract = tesseract5; };

      uvicorn = prev.uvicorn.overridePythonAttrs (_: {
        version = "0.25.0";
        src = fetchFromGitHub {
          owner = "encode";
@@ -245,7 +248,7 @@ python.pkgs.buildPythonApplication rec {
  doCheck = !stdenv.isDarwin;

  passthru = {
    inherit python path frontend;
    inherit python path frontend tesseract5;
    nltkData = with nltk-data; [ punkt snowball_data stopwords ];
    tests = { inherit (nixosTests) paperless; };
  };