Commit 46cf6a17 authored by Devanshu Agrawal's avatar Devanshu Agrawal
Browse files

Add torch experiments.

parent 0ad431ac
Loading
Loading
Loading
Loading
+26 −0
Original line number Diff line number Diff line
import torch
from torch.utils.data import Dataset
from sklearn import datasets


class Boston(Dataset):

	def __init__(self, dtype=torch.float64, standardize=False):
		self.dtype = dtype
		self.load_data(standardize=standardize)

	def __len__(self):
		return len(self.data)

	def load_data(self, standardize):
		self.data, self.labels = datasets.load_boston(return_X_y=True)
		self.data = torch.from_numpy(self.data).to(dtype=self.dtype)
		self.labels = torch.from_numpy(self.labels).to(dtype=self.dtype).view(len(self.labels), 1)

		if standardize:
			self.data = (self.data - torch.mean(self.data, dim=0, keepdim=True))/torch.std(self.data, dim=0, keepdim=True, unbiased=False)
			self.labels = (self.labels - torch.mean(self.labels, dim=0, keepdim=True))/torch.std(self.labels, dim=0, keepdim=True, unbiased=False)


	def __getitem__(self, idx):
		return self.data[idx], self.labels[idx]
+105 −0
Original line number Diff line number Diff line
import os
import json
import time
import argparse
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from modules import BottleneckNNGP
import datasets

# set torch seed
torch.manual_seed(1)

# command-line arguments
parser=argparse.ArgumentParser()
# data
parser.add_argument("--dataset", "-ds", required=True, help="Dataset. One of: boston, iris, rings.")
# architecture
parser.add_argument("--depths", "-d", type=lambda s: [int(i) for i in s.split(",")], default="0", help="List of depths (number of hidden layers) of the NNGP components in comma-separated format.")
parser.add_argument("--widths", "-w", type=lambda s: [int(i) for i in s.split(",")], default="", help="List of bottleneck widths in comma-separated format.")
# initial variance hyperparameters
parser.add_argument('--vb','-vb', default=1.0, type=float, help="Initial value of hyperparameter v_b.")
parser.add_argument('--vw','-vw', default=1.0, type=float, help="Initial value of hyperparameter v_w.")
parser.add_argument('--vn','-vn', default=1.0, type=float, help="Initial value of hyperparameter v_n.")
# number of samples
parser.add_argument("--train_samples", "-s", default=100, type=int, help="Number of MC samples from bottlenecks during training.")
parser.add_argument("--test_samples", "-t", default=100, type=int, help="Number of MC samples from bottlenecks at test time.")
# optimization
parser.add_argument("--iters", "-i", default=10, type=int, help="Number of training iterations.")

args = parser.parse_args()


# load dataset
dataset = getattr(datasets, args.dataset.capitalize())(standardize=True)
dataloader = DataLoader(dataset, batch_size=len(dataset))
X, Y = next(iter(dataloader))

# get model
model = BottleneckNNGP(depths=args.depths, widths=args.widths, v_b=args.vb, v_w=args.vw, v_n=args.vn)

# get optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-1)

# initialize results dict
results = {"train": {"loss": []}}
train_results_keys = ["loss", "v_b", "v_w", "v_n"]
for (key, value) in zip(train_results_keys[1:], [model.v_b, model.v_w, model.v_n]):
	results["train"][key] = [value.item()]

# training loop
print("Training ...")
time_0 = time.time()
for i in range(args.iters):
	# compute loss
	Ks = model(X, num_samples=args.train_samples)
	loss = -model.log_likelihood(Ks, Y)
	# optim step
	loss.backward()
	optimizer.step()
	optimizer.zero_grad()
	# update results
	for (key, value) in zip(train_results_keys, [loss, model.v_b, model.v_w, model.v_n]):
		results["train"][key].append( value.item() )
	# print loss
	print("iter {} loss: {:.3f}".format(i, loss.item()))

# compute loss of final iteration
Ks = model(X, num_samples=args.train_samples)
loss = -model.log_likelihood(Ks, Y)
results["train"]["loss"].append(loss.item())
print("iter {} loss: {:.3f}".format(args.iters, loss.item()))

# record training time
results["train"]["time"] = time.time()-time_0

# get final values with higher number of samples
time_0 = time.time()
Ks = model(X, num_samples=args.test_samples)
loss = -model.log_likelihood(Ks, Y)
results["test"] = {}
for (key, value) in zip(train_results_keys, [loss, model.v_b, model.v_w, model.v_n]):
	results["test"][key] = value.item()

# record test time
results["test"]["time"] = time.time()-time_0

# print final values
print("Final values:")
for key in train_results_keys:
	print(key+": {:.3f}".format(results["test"][key]))

# record command-line arguments
results["args"] = dict(vars(args))

# save results
print("Saving results ...")
dir = os.path.join("results", args.dataset)
filename = "depth"+"_".join(map(str, args.depths))+"_widths"+"_".join(map(str, args.widths))+".json"
os.makedirs(dir, exist_ok=True)
with open(os.path.join(dir, filename), "w") as fp:
	json.dump(results, fp, indent=2)


print("Done!")
 No newline at end of file
+57 −0
Original line number Diff line number Diff line
import math
import torch
import torch.nn as nn

class BottleneckNNGP(nn.Module):

	def __init__(self, depths=[0], widths=[], v_b=1.0, v_w=1.0, v_n=1.0, jitter=1e-10, dtype=torch.float64):
		super().__init__()
		self.depths = depths
		self.widths = widths
		self.jitter = jitter
		self.dtype = dtype

		self.v_b = nn.Parameter(data=torch.tensor([v_b], dtype=self.dtype), requires_grad=True)
		self.v_w = nn.Parameter(data=torch.tensor([v_w], dtype=self.dtype), requires_grad=True)
		self.v_n = nn.Parameter(data=torch.tensor([v_n], dtype=self.dtype), requires_grad=True)


	def forward(self, x, num_samples=100):
		gram_matrices = (x @ x.t()).unsqueeze(0)

		for (depth, width) in zip(self.depths[:-1], self.widths):
			Ks = self.K(gram_matrices, depth, self.jitter)
			Ls = torch.cholesky(Ks)
			samples = torch.randn(num_samples, x.shape[0], width, dtype=self.dtype)
			samples = torch.matmul(Ls, samples)
			samples = torch.tensor([2.], dtype=self.dtype).sqrt() * nn.functional.relu(samples)
			gram_matrices = torch.matmul(samples, torch.transpose(samples, 1, 2)) / width

		Ks = self.K(gram_matrices, self.depths[-1], self.v_n)
		return Ks


	def K(self, gram_matrices, depth, noise):
		K_XX = self.v_b + self.v_w * gram_matrices
		for _ in range(depth):
			A = torch.sqrt(torch.einsum("ijj, ikk->ijk", K_XX, K_XX))
			R = torch.clamp(K_XX/A, -1+self.jitter, 1-self.jitter)
			K_XX = self.v_b + self.v_w * A / math.pi * (torch.sqrt(1 - R ** 2) + (math.pi - torch.acos(R)) * R)
		return K_XX + noise * torch.eye(K_XX.size(1), dtype=self.dtype).unsqueeze(0)


	def log_likelihood(self, Ks, y):
		Ls = torch.cholesky(Ks)
		LYs = torch.triangular_solve(y, Ls, upper=False).solution
		logdets = 2 * torch.sum(torch.log(torch.diagonal(Ls, dim1=1, dim2=2)), 1)
		ll = torch.logsumexp(
			-0.5 * (
				torch.sum(torch.sum(LYs**2, 1), 1)
				+ y.shape[1] * (
					logdets + y.shape[0] * torch.tensor([2 * math.pi], dtype=self.dtype).log()
				)
			),
			0
		) - torch.tensor([Ks.size(0)], dtype=self.dtype).log()

		return ll