Commit 33ce78b4 authored by Devanshu Agrawal's avatar Devanshu Agrawal
Browse files

Add and update multidepth scripts.

parent eec0dd9a
Loading
Loading
Loading
Loading
+74 −0
Original line number Diff line number Diff line
import os
import argparse
import time
import pickle
import random
import itertools
import numpy as np
import pandas as pd
from sklearn import datasets
import models

random.seed(123)
np.random.seed(456)

#parser = argparse.ArgumentParser(description="Convergence of bottleneck NNGP to NNGP in log-likelihood.")
#parser.add_argument("--depths", dest="depths", type=lambda s: [int(i) for i in s.split(",")], default="1,1", help="comma-separated depths of bottleneck components.")
#args = parser.parse_args()

# get initial time
t_0 = time.time()

# load Boston data
X, Y = datasets.load_boston(return_X_y=True)
Y = Y.reshape((-1, 1))
#Y = np.tile(Y, [1, 2])

# take smaller sample
#indices = np.arange(X.shape[0])
#np.random.shuffle(indices)
#X = X[indices[:200]]
#Y = Y[indices[:200]]

# standardize
X = (X - np.mean(X, axis=0, keepdims=True))/np.std(X, axis=0, keepdims=True)
Y = (Y - np.mean(Y, axis=0, keepdims=True))/np.std(Y, axis=0, keepdims=True)

n_samples = 10**3
widths = list(range(1, 9)) + [2**n for n in range(4, 10)] + [np.inf]
depths = [1, 100]
collect_depths = list(range(1, 100))  # list(range(1, 9)) + [2**n for n in range(4, 8)]

# variance hyperparameters
v_b = 0.09 # 1.0
v_w = 1.1 # 1.0
v_n = 1e-4

liks = []
for width in widths:
	print("width:", width)
	ds = [sum(depths)+len(depths)-1] if width == np.inf else depths
	ws = [] if width == np.inf else [width]*(len(depths)-1)
	cds = [d+sum(depths[:-1])+len(depths[:-1]) for d in collect_depths] if width == np.inf else collect_depths
	prior = models.bottleneck_nngp_prior(X, output_dims=Y.shape[1], v_b=v_b, v_w=v_w, v_n=v_n, depths=ds, widths=ws, bottleneck_activation=True)
	l = prior.log_likelihood_multidepth(Y, n_samples=n_samples, collect_depths=collect_depths)
	liks.append(l)

widths = np.array(widths)
collect_depths = np.array(collect_depths)
liks = np.array(liks)/X.shape[0]

# save as npz
os.makedirs("results/boston_multidepth", exist_ok=True)
filename = "results/boston_multidepth/depth"+"-".join(map(str, depths))+".npz"
np.savez(filename, widths=widths, depths=collect_depths, liks=liks)

# write to CSV
data = np.concatenate((widths[:,np.newaxis], np.round(liks, 3)), axis=1)
headers = ["width"] + ["depth_"+str(d) for d in collect_depths]
df = pd.DataFrame(data, columns=headers)
filename = filename[:-3] + "csv"
df.to_csv(filename, index=False)

print("Done!")
print("Took", np.round(time.time()-t_0, 1), "s")
+5 −3
Original line number Diff line number Diff line
@@ -41,9 +41,11 @@ Y = Y + np.random.normal(0, 1e-2, size=Y.shape)


n_samples = 10**3
widths = [1, 2, 3, 4, 5, 10, 50, 100, 500, np.inf]
depths = [1, 100]
collect_depths = [1, 2, 3, 4, 5, 10, 50, 100]
widths = list(range(1, 9)) + [2**n for n in range(4, 12)] + [np.inf]
widths = widths + list(range(10,110,10))
widths.sort()
depths = [1, 128]
collect_depths = list(range(1, 129))  # list(range(1, 9)) + [2**n for n in range(4, 8)]

# variance hyperparameters
v_b = 0.09 # 1.0