Unverified Commit e6aefe20 authored by Mario Rodas's avatar Mario Rodas Committed by GitHub
Browse files

Merge pull request #292533 from flyingcircusio/init-postgresql-anonymizer

postgresqlPackages.anonymizer: init at 1.3.1; add me & osnyx to flyingcircus team
parents 6709f487 b4f8ebd2
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -14620,6 +14620,12 @@
    githubId = 111265;
    name = "Ozan Sener";
  };
  osnyx = {
    email = "os@flyingcircus.io";
    github = "osnyx";
    githubId = 104593071;
    name = "Oliver Schmidt";
  };
  ostrolucky = {
    email = "gabriel.ostrolucky@gmail.com";
    github = "ostrolucky";
+2 −0
Original line number Diff line number Diff line
@@ -311,6 +311,8 @@ with lib.maintainers; {
      dpausp
      frlan
      leona
      osnyx
      ma27
    ];
    scope = "Team for Flying Circus employees who collectively maintain packages.";
    shortName = "Flying Circus employees";
+1 −0
Original line number Diff line number Diff line
@@ -683,6 +683,7 @@ in {
  peering-manager = handleTest ./web-apps/peering-manager.nix {};
  peertube = handleTestOn ["x86_64-linux"] ./web-apps/peertube.nix {};
  peroxide = handleTest ./peroxide.nix {};
  pg_anonymizer = handleTest ./pg_anonymizer.nix {};
  pgadmin4 = handleTest ./pgadmin4.nix {};
  pgbouncer = handleTest ./pgbouncer.nix {};
  pgjwt = handleTest ./pgjwt.nix {};
+94 −0
Original line number Diff line number Diff line
import ./make-test-python.nix ({ pkgs, lib, ... }: {
  name = "pg_anonymizer";
  meta.maintainers = lib.teams.flyingcircus.members;

  nodes.machine = { pkgs, ... }: {
    environment.systemPackages = [ pkgs.pg-dump-anon ];
    services.postgresql = {
      enable = true;
      extraPlugins = ps: [ ps.anonymizer ];
      settings.shared_preload_libraries = "anon";
    };
  };

  testScript = ''
    start_all()
    machine.wait_for_unit("multi-user.target")
    machine.wait_for_unit("postgresql.service")

    with subtest("Setup"):
        machine.succeed("sudo -u postgres psql --command 'create database demo'")
        machine.succeed(
            "sudo -u postgres psql -d demo -f ${pkgs.writeText "init.sql" ''
              create extension anon cascade;
              select anon.init();
              create table player(id serial, name text, points int);
              insert into player(id,name,points) values (1,'Foo', 23);
              insert into player(id,name,points) values (2,'Bar',42);
              security label for anon on column player.name is 'MASKED WITH FUNCTION anon.fake_last_name();';
              security label for anon on column player.points is 'MASKED WITH VALUE NULL';
            ''}"
        )

    def get_player_table_contents():
        return [
            x.split(',') for x in machine.succeed("sudo -u postgres psql -d demo --csv --command 'select * from player'").splitlines()[1:]
        ]

    def check_anonymized_row(row, id, original_name):
        assert row[0] == id, f"Expected first row to have ID {id}, but got {row[0]}"
        assert row[1] != original_name, f"Expected first row to have a name other than {original_name}"
        assert not bool(row[2]), "Expected points to be NULL in first row"

    def find_xsv_in_dump(dump, sep=','):
        """
        Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like

            COPY public.player ...
            1,Shields,
            2,Salazar,
            \.

        in the given dump (the commas are tabs in case of pg_dump).
              Extract the CSV lines and split by `sep`.
        """

        try:
            from itertools import dropwhile, takewhile
            return [x.split(sep) for x in list(takewhile(
                lambda x: x != "\\.",
                dropwhile(
                    lambda x: not x.startswith("COPY public.player"),
                    dump.splitlines()
                )
            ))[1:]]
        except:
            print(f"Dump to process: {dump}")
            raise

    def check_original_data(output):
        assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}"
        assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}"

    def check_anonymized_rows(output):
        check_anonymized_row(output[0], '1', 'Foo')
        check_anonymized_row(output[1], '2', 'Bar')

    with subtest("Check initial state"):
        check_original_data(get_player_table_contents())

    with subtest("Anonymous dumps"):
        check_original_data(find_xsv_in_dump(
            machine.succeed("sudo -u postgres pg_dump demo"),
            sep='\t'
        ))
        check_anonymized_rows(find_xsv_in_dump(
            machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"),
            sep=','
        ))

    with subtest("Anonymize"):
        machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
        check_anonymized_rows(get_player_table_contents())
  '';
})
+32 −0
Original line number Diff line number Diff line
{ lib, fetchFromGitLab, buildGoModule, nixosTests, postgresql, makeWrapper }:

buildGoModule rec {
  pname = "pg-dump-anon";
  version = "1.3.1";
  src = fetchFromGitLab {
    owner = "dalibo";
    repo = "postgresql_anonymizer";
    rev = version;
    hash = "sha256-Z5Oz/cIYDxFUZwQijRk4xAOUdOK0LWR+px8WOcs+Rs0=";
  };

  sourceRoot = "${src.name}/pg_dump_anon";

  vendorHash = "sha256-CwU1zoIayxvfnGL9kPdummPJiV+ECfSz4+q6gZGb8pw=";

  passthru.tests = { inherit (nixosTests) pg_anonymizer; };

  nativeBuildInputs = [ makeWrapper ];
  postInstall = ''
    wrapProgram $out/bin/pg_dump_anon \
      --prefix PATH : ${lib.makeBinPath [ postgresql ]}
  '';

  meta = with lib; {
    description = "Export databases with data being anonymized with the anonymizer extension";
    homepage = "https://postgresql-anonymizer.readthedocs.io/en/stable/";
    maintainers = teams.flyingcircus.members;
    license = licenses.postgresql;
    mainProgram = "pg_dump_anon";
  };
}
Loading