Loading torch_experiments/main.py +8 −5 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ parser.add_argument('--vn','-vn', default=1.0, type=float, help="Initial value o parser.add_argument("--train_samples", "-s", default=100, type=int, help="Number of MC samples from bottlenecks during training.") parser.add_argument("--test_samples", "-t", default=100, type=int, help="Number of MC samples from bottlenecks at test time.") # optimization parser.add_argument('--lr','-l', default=1e-3, type=float, help="Learning rate.") parser.add_argument("--iters", "-i", default=10, type=int, help="Number of training iterations.") # GPU parser.add_argument("--gpu", "-g", default=-1, type=int, help="Which GPU to use. If negative, CPU is used.") Loading @@ -43,12 +44,13 @@ X, Y = next(iter(dataloader)) model = BottleneckNNGP(depths=args.depths, widths=args.widths, v_b=args.vb, v_w=args.vw, v_n=args.vn, device=device) # get optimizer optimizer = optim.Adam(model.parameters(), lr=1e-1) optimizer = optim.Adam(model.parameters(), lr=args.lr) #scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda iter: 1/(1+iter//5)) # initialize results dict results = {"train": {"loss": []}} train_results_keys = ["loss", "v_b", "v_w", "v_n"] for (key, value) in zip(train_results_keys[1:], [model.v_b, model.v_w, model.v_n]): for (key, value) in zip(train_results_keys[1:], [model.v_b(), model.v_w(), model.v_n()]): results["train"][key] = [value.item()] # training loop Loading @@ -61,8 +63,9 @@ for i in range(args.iters): loss.backward() optimizer.step() optimizer.zero_grad() # scheduler.step() # update results for (key, value) in zip(train_results_keys, [loss, model.v_b, model.v_w, model.v_n]): for (key, value) in zip(train_results_keys, [loss, model.v_b(), model.v_w(), model.v_n()]): results["train"][key].append( value.item() ) # print loss print("iter {} loss: {:.3f}".format(i, loss.item())) Loading @@ -81,7 +84,7 @@ time_0 = time.time() with torch.no_grad(): loss = -model.log_likelihood(X, Y, num_samples=args.test_samples) results["test"] = {} for (key, value) in zip(train_results_keys, [loss, model.v_b, model.v_w, model.v_n]): for (key, value) in zip(train_results_keys, [loss, model.v_b(), model.v_w(), model.v_n()]): results["test"][key] = value.item() # record test time Loading @@ -98,7 +101,7 @@ results["args"] = dict(vars(args)) # save results print("Saving results ...") dir = os.path.join("results", args.dataset) filename = "depth"+"_".join(map(str, args.depths))+"_widths"+"_".join(map(str, args.widths))+".json" filename = "depths"+"_".join(map(str, args.depths))+"_widths"+"_".join(map(str, args.widths))+".json" os.makedirs(dir, exist_ok=True) with open(os.path.join(dir, filename), "w") as fp: json.dump(results, fp, indent=2) Loading torch_experiments/modules.py +20 −6 Original line number Diff line number Diff line Loading @@ -2,6 +2,20 @@ import math import torch import torch.nn as nn class PositiveParameter(nn.Module): def __init__(self, *args, **kwargs): super().__init__() inverse_softplus = lambda x: torch.log(torch.exp(x)-1.0) if len(args) > 0: args[0] = inverse_softplus(args[0]) else: kwargs["data"] = inverse_softplus(kwargs["data"]) self.unconstrained = nn.Parameter(*args, **kwargs) def forward(self): return nn.functional.softplus(self.unconstrained) class BottleneckNNGP(nn.Module): def __init__(self, depths=[0], widths=[], v_b=1.0, v_w=1.0, v_n=1.0, jitter=1e-10, dtype=torch.float64, device="cpu"): Loading @@ -12,17 +26,17 @@ class BottleneckNNGP(nn.Module): self.dtype = dtype self.device = device self.v_b = nn.Parameter(data=torch.tensor([v_b], dtype=self.dtype, device=self.device), requires_grad=True) self.v_w = nn.Parameter(data=torch.tensor([v_w], dtype=self.dtype, device=self.device), requires_grad=True) self.v_n = nn.Parameter(data=torch.tensor([v_n], dtype=self.dtype, device=self.device), requires_grad=True) self.v_b = PositiveParameter(data=torch.tensor([v_b], dtype=self.dtype, device=self.device), requires_grad=True) self.v_w = PositiveParameter(data=torch.tensor([v_w], dtype=self.dtype, device=self.device), requires_grad=True) self.v_n = PositiveParameter(data=torch.tensor([v_n], dtype=self.dtype, device=self.device), requires_grad=True) def K(self, gram_matrices, depth, noise): K_XX = self.v_b + self.v_w * gram_matrices K_XX = self.v_b() + self.v_w() * gram_matrices for _ in range(depth): A = torch.sqrt(torch.einsum("ijj, ikk->ijk", K_XX, K_XX)) R = torch.clamp(K_XX/A, -1+self.jitter, 1-self.jitter) K_XX = self.v_b + self.v_w * A / math.pi * (torch.sqrt(1 - R ** 2) + (math.pi - torch.acos(R)) * R) K_XX = self.v_b() + self.v_w() * A / math.pi * (torch.sqrt(1 - R ** 2) + (math.pi - torch.acos(R)) * R) return K_XX + noise * torch.eye(K_XX.size(1), dtype=self.dtype, device=self.device).unsqueeze(0) Loading @@ -41,7 +55,7 @@ class BottleneckNNGP(nn.Module): samples = torch.tensor([2.], dtype=self.dtype, device=self.device).sqrt() * nn.functional.relu(samples) gram_matrices = torch.matmul(samples, torch.transpose(samples, 1, 2)) / width Ks = self.K(gram_matrices, self.depths[-1], self.v_n) Ks = self.K(gram_matrices, self.depths[-1], self.v_n()) Ls = torch.cholesky(Ks) LYs = torch.triangular_solve(y, Ls, upper=False).solution logdets = 2 * torch.sum(torch.log(torch.diagonal(Ls, dim1=1, dim2=2)), 1) Loading Loading
torch_experiments/main.py +8 −5 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ parser.add_argument('--vn','-vn', default=1.0, type=float, help="Initial value o parser.add_argument("--train_samples", "-s", default=100, type=int, help="Number of MC samples from bottlenecks during training.") parser.add_argument("--test_samples", "-t", default=100, type=int, help="Number of MC samples from bottlenecks at test time.") # optimization parser.add_argument('--lr','-l', default=1e-3, type=float, help="Learning rate.") parser.add_argument("--iters", "-i", default=10, type=int, help="Number of training iterations.") # GPU parser.add_argument("--gpu", "-g", default=-1, type=int, help="Which GPU to use. If negative, CPU is used.") Loading @@ -43,12 +44,13 @@ X, Y = next(iter(dataloader)) model = BottleneckNNGP(depths=args.depths, widths=args.widths, v_b=args.vb, v_w=args.vw, v_n=args.vn, device=device) # get optimizer optimizer = optim.Adam(model.parameters(), lr=1e-1) optimizer = optim.Adam(model.parameters(), lr=args.lr) #scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda iter: 1/(1+iter//5)) # initialize results dict results = {"train": {"loss": []}} train_results_keys = ["loss", "v_b", "v_w", "v_n"] for (key, value) in zip(train_results_keys[1:], [model.v_b, model.v_w, model.v_n]): for (key, value) in zip(train_results_keys[1:], [model.v_b(), model.v_w(), model.v_n()]): results["train"][key] = [value.item()] # training loop Loading @@ -61,8 +63,9 @@ for i in range(args.iters): loss.backward() optimizer.step() optimizer.zero_grad() # scheduler.step() # update results for (key, value) in zip(train_results_keys, [loss, model.v_b, model.v_w, model.v_n]): for (key, value) in zip(train_results_keys, [loss, model.v_b(), model.v_w(), model.v_n()]): results["train"][key].append( value.item() ) # print loss print("iter {} loss: {:.3f}".format(i, loss.item())) Loading @@ -81,7 +84,7 @@ time_0 = time.time() with torch.no_grad(): loss = -model.log_likelihood(X, Y, num_samples=args.test_samples) results["test"] = {} for (key, value) in zip(train_results_keys, [loss, model.v_b, model.v_w, model.v_n]): for (key, value) in zip(train_results_keys, [loss, model.v_b(), model.v_w(), model.v_n()]): results["test"][key] = value.item() # record test time Loading @@ -98,7 +101,7 @@ results["args"] = dict(vars(args)) # save results print("Saving results ...") dir = os.path.join("results", args.dataset) filename = "depth"+"_".join(map(str, args.depths))+"_widths"+"_".join(map(str, args.widths))+".json" filename = "depths"+"_".join(map(str, args.depths))+"_widths"+"_".join(map(str, args.widths))+".json" os.makedirs(dir, exist_ok=True) with open(os.path.join(dir, filename), "w") as fp: json.dump(results, fp, indent=2) Loading
torch_experiments/modules.py +20 −6 Original line number Diff line number Diff line Loading @@ -2,6 +2,20 @@ import math import torch import torch.nn as nn class PositiveParameter(nn.Module): def __init__(self, *args, **kwargs): super().__init__() inverse_softplus = lambda x: torch.log(torch.exp(x)-1.0) if len(args) > 0: args[0] = inverse_softplus(args[0]) else: kwargs["data"] = inverse_softplus(kwargs["data"]) self.unconstrained = nn.Parameter(*args, **kwargs) def forward(self): return nn.functional.softplus(self.unconstrained) class BottleneckNNGP(nn.Module): def __init__(self, depths=[0], widths=[], v_b=1.0, v_w=1.0, v_n=1.0, jitter=1e-10, dtype=torch.float64, device="cpu"): Loading @@ -12,17 +26,17 @@ class BottleneckNNGP(nn.Module): self.dtype = dtype self.device = device self.v_b = nn.Parameter(data=torch.tensor([v_b], dtype=self.dtype, device=self.device), requires_grad=True) self.v_w = nn.Parameter(data=torch.tensor([v_w], dtype=self.dtype, device=self.device), requires_grad=True) self.v_n = nn.Parameter(data=torch.tensor([v_n], dtype=self.dtype, device=self.device), requires_grad=True) self.v_b = PositiveParameter(data=torch.tensor([v_b], dtype=self.dtype, device=self.device), requires_grad=True) self.v_w = PositiveParameter(data=torch.tensor([v_w], dtype=self.dtype, device=self.device), requires_grad=True) self.v_n = PositiveParameter(data=torch.tensor([v_n], dtype=self.dtype, device=self.device), requires_grad=True) def K(self, gram_matrices, depth, noise): K_XX = self.v_b + self.v_w * gram_matrices K_XX = self.v_b() + self.v_w() * gram_matrices for _ in range(depth): A = torch.sqrt(torch.einsum("ijj, ikk->ijk", K_XX, K_XX)) R = torch.clamp(K_XX/A, -1+self.jitter, 1-self.jitter) K_XX = self.v_b + self.v_w * A / math.pi * (torch.sqrt(1 - R ** 2) + (math.pi - torch.acos(R)) * R) K_XX = self.v_b() + self.v_w() * A / math.pi * (torch.sqrt(1 - R ** 2) + (math.pi - torch.acos(R)) * R) return K_XX + noise * torch.eye(K_XX.size(1), dtype=self.dtype, device=self.device).unsqueeze(0) Loading @@ -41,7 +55,7 @@ class BottleneckNNGP(nn.Module): samples = torch.tensor([2.], dtype=self.dtype, device=self.device).sqrt() * nn.functional.relu(samples) gram_matrices = torch.matmul(samples, torch.transpose(samples, 1, 2)) / width Ks = self.K(gram_matrices, self.depths[-1], self.v_n) Ks = self.K(gram_matrices, self.depths[-1], self.v_n()) Ls = torch.cholesky(Ks) LYs = torch.triangular_solve(y, Ls, upper=False).solution logdets = 2 * torch.sum(torch.log(torch.diagonal(Ls, dim1=1, dim2=2)), 1) Loading