Unverified Commit 6e87867e authored by Maximilian Bosch's avatar Maximilian Bosch
Browse files

nixos/postgresql: allow customisations of SystemCallFilter

Closes #385603

The problem described is that `wal-g` requires syscalls from `@resources`.
However, we don't have support for it in the module now and I don't
think it's reasonable to only support hardening adjustments for things
support by this module. Also, list is a bad datatype here since it
doesn't allow the level of customizations we need.

This is only for the syscall filterset since it's the option that's hard
to customize otherwise. For downstream configs, it's recommended to
adjust the hardening as needed in other cases.

Hence I decided to implement `services.postgresql.systemCallFilter` with
the following semantics:

* `systemCallFilter."~@resources" = true` adds `~@resources` to the
  filterset.

* Setting this to `false` (e.g. in a downstream configuration using
  `wal-g`) removes the entry `~@resources` from the filterset. In this
  case it's sufficient since `@system-service` implies `@resources` and
  the `~@resources` declaration after that discards that.

  I decided to not implement logic about negations in here, but to keep
  it rather simple by only allowing to set/unset entries.

As described in `systemd.exec(5)`, the ordering matters: e.g.
`@system-service` implies `@resources`, but `~@resources` _after_ that
reverts that. By default, the ordering of the keys is as follows:

* syscall groups (starting with `@`) come at first.
* negations of syscall groups (starting with `~@`) come after that.
* anything else at the end.

If further ordering is needed, it can be done like this:

```
{
  services.postgresql.systemCallFilter."~@resources" = {
    enable = true; # whether or not it's part of the final SystemCallFilter
    priority = 23; # ordering priority in the filterset.
  };
}
```

The lower the priority, the higher up the entry will be in the final
filterset.
parent 53370ca1
Loading
Loading
Loading
Loading
+120 −10
Original line number Diff line number Diff line
@@ -14,8 +14,11 @@ let
    const
    elem
    escapeShellArgs
    filter
    filterAttrs
    getAttr
    getName
    hasPrefix
    isString
    literalExpression
    mapAttrs
@@ -31,6 +34,8 @@ let
    mkRemovedOptionModule
    mkRenamedOptionModule
    optionalString
    pipe
    sortProperties
    types
    versionAtLeast
    warn
@@ -124,6 +129,100 @@ in
        '';
      };

      systemCallFilter = mkOption {
        type = types.attrsOf (
          types.coercedTo types.bool (enable: { inherit enable; }) (
            types.submodule (
              { name, ... }:
              {
                options = {
                  enable = mkEnableOption "${name} in postgresql's syscall filter";
                  priority = mkOption {
                    default =
                      if hasPrefix "@" name then
                        500
                      else if hasPrefix "~@" name then
                        1000
                      else
                        1500;
                    defaultText = literalExpression ''
                      if hasPrefix "@" name then 500 else if hasPrefix "~@" name then 1000 else 1500
                    '';
                    type = types.int;
                    description = ''
                      Set the priority of the system call filter setting. Later declarations
                      override earlier ones, e.g.

                      ```ini
                      [Service]
                      SystemCallFilter=~read write
                      SystemCallFilter=write
                      ```

                      results in a service where _only_ `read` is not allowed.

                      The ordering in the unit file is controlled by this option: the higher
                      the number, the later it will be added to the filterset.

                      By default, depending on the prefix a priority is assigned: usually, call-groups
                      (starting with `@`) are used to allow/deny a larger set of syscalls and later
                      on single syscalls are configured for exceptions. Hence, syscall groups
                      and negative groups are placed before individual syscalls by default.
                    '';
                  };
                };
              }
            )
          )
        );
        defaultText = literalExpression ''
          {
            "@system-service" = true;
            "~@privileged" = true;
            "~@resources" = true;
          }
        '';
        description = ''
          Configures the syscall filter for `postgresql.service`. The keys are
          declarations for `SystemCallFilter` as described in {manpage}`systemd.exec(5)`.

          The value is a boolean: `true` adds the attribute name to the syscall filter-set,
          `false` doesn't. This is done to allow downstream configurations to turn off
          restrictions made here. E.g. with

          ```nix
          {
            services.postgresql.systemCallFilter."~@resources" = false;
          }
          ```

          it's possible to remove the restriction on `@resources` (keep in mind that
          `@system-service` implies `@resources`).

          As described in the section for [](#opt-services.postgresql.systemCallFilter._name_.priority),
          the ordering matters. Hence, it's also possible to specify customizations with

          ```nix
          {
            services.postgresql.systemCallFilter = {
              "foobar" = { enable = true; priority = 23; };
            };
          }
          ```

          [](#opt-services.postgresql.systemCallFilter._name_.enable) is the flag whether
          or not it will be added to the `SystemCallFilter` of `postgresql.service`.

          Settings with a higher priority are added after filter settings with a lower
          priority. Hence, syscall groups with a higher priority can discard declarations
          with a lower priority.

          By default, syscall groups (i.e. attribute names starting with `@`) are added
          _before_ negated groups (i.e. `~@` as prefix) _before_ syscall names
          and negations.
        '';
      };

      checkConfig = mkOption {
        type = types.bool;
        default = true;
@@ -583,6 +682,21 @@ in
      '')
    ];

    services.postgresql.systemCallFilter = mkMerge [
      (mapAttrs (const mkDefault) {
        "@system-service" = true;
        "~@privileged" = true;
        "~@resources" = true;
      })
      (mkIf (any extensionInstalled [ "plv8" ]) {
        "@pkey" = true;
      })
      (mkIf (any extensionInstalled [ "citus" ]) {
        "getpriority" = true;
        "setpriority" = true;
      })
    ];

    users.users.postgres = {
      name = "postgres";
      uid = config.ids.uids.postgres;
@@ -727,15 +841,11 @@ in
          RestrictRealtime = true;
          RestrictSUIDSGID = true;
          SystemCallArchitectures = "native";
          SystemCallFilter =
            [
              "@system-service"
              "~@privileged @resources"
            ]
            ++ lib.optionals (any extensionInstalled [ "plv8" ]) [ "@pkey" ]
            ++ lib.optionals (any extensionInstalled [ "citus" ]) [
              "getpriority"
              "setpriority"
          SystemCallFilter = pipe cfg.systemCallFilter [
            (mapAttrsToList (name: v: v // { inherit name; }))
            (filter (getAttr "enable"))
            sortProperties
            (map (getAttr "name"))
          ];
          UMask = if groupAccessAvailable then "0027" else "0077";
        }