Unverified Commit 6066ea41 authored by kirillrdy's avatar kirillrdy Committed by GitHub
Browse files

nixos/anubis: improve policy configuration (#477377)

parents a61f06cd df7f2ece
Loading
Loading
Loading
Loading
+49 −7
Original line number Diff line number Diff line
@@ -78,15 +78,57 @@ It is possible to configure default settings for all instances of Anubis, via {o
```nix
{
  services.anubis.defaultOptions = {
    botPolicy = {
      dnsbl = false;
    };
    settings.DIFFICULTY = 3;
  };
}
```

Note that at the moment, a custom bot policy is not merged with the baked-in one. That means to only override a setting
like `dnsbl`, copying the entire bot policy is required. Check
[the upstream repository](https://github.com/TecharoHQ/anubis/blob/1509b06cb921aff842e71fbb6636646be6ed5b46/cmd/anubis/botPolicies.json)
for the policy.
By default, this module uses Anubis's built-in policy (`botPolicies.yaml`), which includes sensible defaults for bot
rules, thresholds, status codes, and storage backend. A custom policy file is only generated when you explicitly
customize the policy via {option}`services.anubis.instances.<name>.policy`.

To add custom bot rules while keeping the defaults:

```nix
{
  services.anubis.instances.default = {
    settings.TARGET = "http://localhost:8000";
    policy.extraBots = [
      {
        name = "my-allowed-bot";
        user_agent_regex = "MyBot/.*";
        action = "ALLOW";
      }
    ];
  };
}
```

To opt out of the default bot rules entirely and define your own:

```nix
{
  services.anubis.instances.default = {
    settings.TARGET = "http://localhost:8000";
    policy = {
      useDefaultBotRules = false;
      extraBots = [
        {
          name = "my-rule";
          path_regex = ".*";
          action = "CHALLENGE";
        }
      ];
    };
  };
}
```

::: {.note}
When you customize the policy, a custom policy file is generated. This file imports the default bot rules via
`(data)/meta/default-config.yaml` when {option}`services.anubis.instances.<name>.policy.useDefaultBotRules` is enabled,
but uses Anubis's simpler legacy threshold instead of the 5-tier thresholds from `botPolicies.yaml`. If you need custom
thresholds, specify them in {option}`services.anubis.instances.<name>.policy.settings`.
:::

See [the upstream documentation](https://anubis.techaro.lol/docs/admin/policies) for all available policy options.
+94 −13
Original line number Diff line number Diff line
@@ -12,6 +12,32 @@ let
  enabledInstances = lib.filterAttrs (_: conf: conf.enable) cfg.instances;
  instanceName = name: if name == "" then "anubis" else "anubis-${name}";

  # Only generates a custom policy file when the user has explicitly customized
  # something (extraBots, settings, or disabled default bot rules). When nothing
  # is customized, returns null so Anubis uses its built-in botPolicies.yaml
  # which includes sensible defaults for thresholds, status_codes, store, etc.
  mkPolicyFile =
    name: instance:
    let
      hasCustomization =
        !instance.policy.useDefaultBotRules
        || instance.policy.extraBots != [ ]
        || instance.policy.settings != { };
      bots =
        (lib.optional instance.policy.useDefaultBotRules {
          import = "(data)/meta/default-config.yaml";
        })
        ++ instance.policy.extraBots;
      policyContent = {
        inherit bots;
      }
      // instance.policy.settings;
    in
    if hasCustomization then
      jsonFormat.generate "${instanceName name}-policy.json" policyContent
    else
      null;

  unixAddr = network: addr: lib.strings.optionalString (network == "unix") addr;
  unixSocketAddrs =
    settings:
@@ -40,6 +66,10 @@ let
    in
    { name, ... }:
    {
      imports = [
        (lib.mkRenamedOptionModule [ "botPolicy" ] [ "policy" "settings" ])
      ];

      options = {
        enable = lib.mkEnableOption "this instance of Anubis" // {
          default = true;
@@ -65,18 +95,71 @@ let
          type = types.str;
        };

        botPolicy = mkDefaultOption "botPolicy" {
          default = null;
        policy = lib.mkOption {
          default = { };
          description = ''
            Anubis policy configuration in Nix syntax. Set to `null` to use the baked-in policy which should be
            sufficient for most use-cases.

            This option has no effect if `settings.POLICY_FNAME` is set to a different value, which is useful for
            importing an existing configuration.
            Anubis policy configuration.

            See [the documentation](https://anubis.techaro.lol/docs/admin/policies) for details.
          '';
          type = types.nullOr jsonFormat.type;
          type = types.submodule {
            options = {
              useDefaultBotRules = mkDefaultOption "policy.useDefaultBotRules" {
                type = types.bool;
                default = true;
                description = ''
                  Whether to include Anubis's default bot detection rules via the
                  `(data)/meta/default-config.yaml` import.

                  Set to `false` to define your own bot rules from scratch using
                  {option}`extraBots`.
                '';
              };

              extraBots = mkDefaultOption "policy.extraBots" {
                type = types.listOf jsonFormat.type;
                default = [ ];
                example = lib.literalExpression ''
                  [
                    {
                      name = "my-bot";
                      user_agent_regex = "MyBot/.*";
                      action = "ALLOW";
                    }
                  ]
                '';
                description = ''
                  Additional bot rules appended to the policy.

                  When {option}`useDefaultBotRules` is `true`, these rules are added after
                  Anubis's default rules. When `false`, only these rules are used.
                '';
              };

              settings = mkDefaultOption "policy.settings" {
                type = jsonFormat.type;
                default = { };
                example = lib.literalExpression ''
                  {
                    dnsbl = false;
                    store = {
                      backend = "bbolt";
                      parameters.path = "/var/lib/anubis/data.bdb";
                    };
                  }
                '';
                description = ''
                  Additional policy settings merged into the policy file.

                  Common settings include `dnsbl`, `store`, `logging`, `thresholds`,
                  `impressum`, `openGraph`, and `statusCodes`.

                  See [the documentation](https://anubis.techaro.lol/docs/admin/policies) for
                  available options.
                '';
              };
            };
          };
        };

        extraFlags = mkDefaultOption "extraFlags" {
@@ -175,8 +258,8 @@ let
                POLICY_FNAME = mkDefaultOption "settings.POLICY_FNAME" {
                  default = null;
                  description = ''
                    The bot policy file to use. Leave this as `null` to respect the value set in
                    {option}`services.anubis.instances.<name>.botPolicy`.
                    The policy file to use. Leave this as `null` to use the policy generated from
                    {option}`services.anubis.instances.<name>.policy`.
                  '';
                  type = types.nullOr types.path;
                };
@@ -306,10 +389,8 @@ in
              POLICY_FNAME =
                if instance.settings.POLICY_FNAME != null then
                  instance.settings.POLICY_FNAME
                else if instance.botPolicy != null then
                  jsonFormat.generate "${instanceName name}-botPolicy.json" instance.botPolicy
                else
                  null;
                  mkPolicyFile name instance;
            }
          )
        );
+23 −40
Original line number Diff line number Diff line
{ lib, ... }:
let
  legacyBotPolicyJSON = ''
    {
      "bots": [
        {
          "import": "(data)/bots/_deny-pathological.yaml"
        },
        {
          "import": "(data)/meta/ai-block-aggressive.yaml"
        },
        {
          "import": "(data)/crawlers/_allow-good.yaml"
        },
        {
          "import": "(data)/bots/aggressive-brazilian-scrapers.yaml"
        },
        {
          "import": "(data)/common/keep-internet-working.yaml"
        },
        {
          "name": "generic-browser",
          "user_agent_regex": "Mozilla|Opera",
          "action": "CHALLENGE"
        }
      ],
      "dnsbl": false,
      "status_codes": {
        "CHALLENGE": 200,
        "DENY": 200
      }
    }'';
in
{
  name = "anubis";
  meta.maintainers = with lib.maintainers; [
@@ -54,7 +22,6 @@ in

      services.anubis = {
        defaultOptions = {
          botPolicy = builtins.fromJSON legacyBotPolicyJSON;
          settings = {
            DIFFICULTY = 3;
            USER_DEFINED_DEFAULT = true;
@@ -92,14 +59,29 @@ in
          };
        };

        instances."botPolicy-default" = {
          botPolicy = null;
        instances."policy-default" = {
          settings = {
            TARGET = "http://localhost:8080";
          };
        };

        instances."policy-custom" = {
          policy = {
            extraBots = [
              {
                name = "custom-allow";
                user_agent_regex = "CustomBot/.*";
                action = "ALLOW";
              }
            ];
            settings.dnsbl = false;
          };
          settings = {
            TARGET = "http://localhost:8080";
          };
        };

        instances."botPolicy-file" = {
        instances."policy-file" = {
          settings = {
            TARGET = "http://localhost:8080";
            POLICY_FNAME = "/etc/anubis-botPolicy.json";
@@ -191,9 +173,10 @@ in
    machine.succeed('cat /run/current-system/etc/systemd/system/anubis.service | grep "DIFFICULTY=5"')
    machine.succeed('cat /run/current-system/etc/systemd/system/anubis-tcp.service | grep "DIFFICULTY=3"')

    # Check correct BotPolicy settings are applied
    machine.succeed('cat /run/current-system/etc/systemd/system/anubis.service | grep "POLICY_FNAME=/nix/store"')
    machine.fail('cat /run/current-system/etc/systemd/system/anubis-botPolicy-default.service | grep "POLICY_FNAME="')
    machine.succeed('cat /run/current-system/etc/systemd/system/anubis-botPolicy-file.service | grep "POLICY_FNAME=/etc/anubis-botPolicy.json"')
    # Check correct policy settings are applied.
    machine.fail('cat /run/current-system/etc/systemd/system/anubis.service | grep "POLICY_FNAME="')
    machine.fail('cat /run/current-system/etc/systemd/system/anubis-policy-default.service | grep "POLICY_FNAME="')
    machine.succeed('cat /run/current-system/etc/systemd/system/anubis-policy-custom.service | grep "POLICY_FNAME=/nix/store"')
    machine.succeed('cat /run/current-system/etc/systemd/system/anubis-policy-file.service | grep "POLICY_FNAME=/etc/anubis-botPolicy.json"')
  '';
}