Unverified Commit 02ac532d authored by Gaboardi, James's avatar Gaboardi, James
Browse files

cleaning out relic files

parent c855b36f
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 29 13:46:41 2021
@author: joe
"""
import numpy as np
import pandas as pd
from scipy import sparse
from scipy.optimize import minimize
class pmedm:
"""P-MEDM class.
Attributes:
serial: Response IDs.
wt: Sample weights.
N: Total population.
n: Total responses.
pX: Individual-level constraints.
Y1: Level-1 (aggregate) constraints.
Y2: Level-2 (target) constraints.
Y_vec: Vectorized constraints.
V1: Level-1 (aggregate) error variances.
V2: Level-2 (target) error variances.
V_vec: Vectorized error variances.
sV: Diagonal matrix of error variances.
topo: Geographic crosswalk.
A1: Geographic crosswalk (np-array).
A2: Level-2 (target) identity matrix.
X: Solution space.
q: Occurrence probabilities.
lam: Parameters.
"""
def __init__(self, serial, wt, cind, cg1, cg2, sg1, sg2, q = None,\
lam = None, verbose = True, keep_solver = True):
"""
Parameters
----------
serial : TYPE
DESCRIPTION.
wt : TYPE
DESCRIPTION.
cind : TYPE
DESCRIPTION.
cg1 : TYPE
DESCRIPTION.
cg2 : TYPE
DESCRIPTION.
sg1 : TYPE
DESCRIPTION.
sg2 : TYPE
DESCRIPTION.
q : TYPE, optional
DESCRIPTION. The default is None.
lam : TYPE, optional
DESCRIPTION. The default is None.
Returns
-------
None.
"""
# Response IDs
self.serial = serial
# Sample weights
self.wt = wt
# Total population
self.N = np.sum(self.wt)
# Total responses
self.n = cind.shape[0]
# Individual constraints
self.pX = cind.values
# Geographic constraints
self.Y1 = cg1.values
self.Y2 = cg2.values
self.Y_vec = np.concatenate([self.Y1.flatten('A'), self.Y2.flatten('A')]) / self.N
# Geographic constraint error variances
self.V1 = np.square(sg1.astype('float').values)
self.V2 = np.square(sg2.astype('float').values)
self.V_vec = np.concatenate([self.V1.flatten('A'), self.V2.flatten('A')])\
* (self.n / self.N**2)
self.sV = np.diag(self.V_vec)
# Geographic topology
self.topo = pd.DataFrame({'g1': cg2.index.values,\
'g2': [str(i)[0] for i in cg2.index.values]})
self.A1 = np.array([1 * (self.topo.g2.values == G)\
for G in np.unique(self.topo.g2.values)])
self.A2 = np.identity(self.topo.shape[0])
# Solution space
X1 = np.kron(np.transpose(self.pX), self.A1)
X2 = np.kron(np.transpose(self.pX), self.A2)
self.X = np.transpose(np.vstack((X1, X2)))
# Initial probabilities
if q is None:
self.q = np.repeat(wt, self.A1.shape[1], axis = 0)
self.q = self.q / np.sum(self.q)
# Initial parameters
if lam is None:
self.lam = np.zeros((len(self.Y_vec),))
# solver gradient
self.grad = None
# solver hessian
self.hess = None
# for storing solver results
self.res = None
# print output?
self.verbose = verbose
# keep P-MEDM solver results?
self.keep_solver = keep_solver
@staticmethod
def f(lam, q, X, Y_vec, sV):
"""Objective function
"""
qXl = q * np.exp(np.matmul(-X, lam))
lvl = lam.dot(np.matmul(sV, lam))
F = Y_vec.dot(lam) + np.log(np.sum(qXl)) + (0.5 * lvl)
return F
@staticmethod
def g(lam, q, X, Y_vec, sV):
"""Gradient function
"""
qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl)
Xp = np.matmul(np.transpose(X), p)
G = Y_vec + np.matmul(sV, lam) - Xp
return G
@staticmethod
def h(lam, q, X, Y_vec, sV):
"""Hessian function
"""
qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl)
# first term
h1a = np.matmul(np.transpose(X), p)
h1b = p.dot(X)
h1 = h1a[:,None] * h1b
# second term
dp = sparse.csr_matrix(np.diag(p))
h2 = np.matmul(X.transpose(), dp.dot(X))
H = -h1 + h2 + sV
return H
def update_probabilities(self, lam):
"""Updates occurrence probabilities
"""
qXl = self.q * np.exp(np.matmul(-self.X, lam))
self.q = qXl / np.sum(qXl)
def solve(self):
minim = minimize(method = 'trust-krylov',\
fun = self.f, x0 = self.lam, jac = self.g, hess = self.h,\
args=(self.q, self.X, self.Y_vec, self.sV),\
options = {'disp' : self.verbose, 'return_all': True})
# updates
self.lam = minim.x
self.update_probabilities(self.lam)
self.grad = minim.jac
self.hess = minim.hess
if self.keep_solver:
self.res = minim
\ No newline at end of file
......@@ -11,54 +11,67 @@ import pandas as pd
from scipy import sparse
from scipy.optimize import minimize
class pmedm:
"""P-MEDM class.
Attributes:
"""P-MEDM class.
Attributes:
serial: Response IDs.
wt: Sample weights.
N: Total population.
n: Total responses.
pX: Individual-level constraints.
Y1: Level-1 (aggregate) constraints.
Y1: Level-1 (aggregate) constraints.
Y2: Level-2 (target) constraints.
Y_vec: Vectorized constraints.
V1: Level-1 (aggregate) error variances.
V2: Level-2 (target) error variances.
V_vec: Vectorized error variances.
sV: Diagonal matrix of error variances.
topo: Geographic crosswalk.
A1: Geographic crosswalk (np-array).
topo: Geographic crosswalk.
A1: Geographic crosswalk (np-array).
A2: Level-2 (target) identity matrix.
X: Solution space.
q: Occurrence probabilities.
lam: Parameters.
"""
def __init__(self, serial, wt, cind, cg1, cg2, sg1, sg2, q = None,\
lam = None, verbose = True, keep_solver = True):
"""
def __init__(
self,
serial,
wt,
cind,
cg1,
cg2,
sg1,
sg2,
q=None,
lam=None,
verbose=True,
keep_solver=True,
):
"""
Parameters
......@@ -87,150 +100,156 @@ class pmedm:
None.
"""
# Response IDs
self.serial = serial
# Sample weights
self.wt = wt
self.wt = wt
# Total population
self.N = np.sum(self.wt)
# Total responses
self.n = cind.shape[0]
# Individual constraints
self.pX = cind.values
# Geographic constraints
# Geographic constraints
self.Y1 = cg1.values
self.Y2 = cg2.values
self.Y_vec = np.concatenate([self.Y1.flatten('A'), self.Y2.flatten('A')]) / self.N
self.Y_vec = (
np.concatenate([self.Y1.flatten("A"), self.Y2.flatten("A")]) / self.N
)
# Geographic constraint error variances
self.V1 = np.square(sg1.astype('float').values)
self.V2 = np.square(sg2.astype('float').values)
self.V1 = np.square(sg1.astype("float").values)
self.V2 = np.square(sg2.astype("float").values)
self.V_vec = np.concatenate([self.V1.flatten("A"), self.V2.flatten("A")]) * (
self.n / self.N ** 2
)
self.V_vec = np.concatenate([self.V1.flatten('A'), self.V2.flatten('A')])\
* (self.n / self.N**2)
self.sV = np.diag(self.V_vec)
# Geographic topology
self.topo = pd.DataFrame({'g1': cg2.index.values,\
'g2': [str(i)[0] for i in cg2.index.values]})
self.topo = pd.DataFrame(
{"g1": cg2.index.values, "g2": [str(i)[0] for i in cg2.index.values]}
)
self.A1 = np.array([1 * (self.topo.g2.values == G)\
for G in np.unique(self.topo.g2.values)])
self.A1 = np.array(
[1 * (self.topo.g2.values == G) for G in np.unique(self.topo.g2.values)]
)
self.A2 = np.identity(self.topo.shape[0])
# Solution space
X1 = np.kron(np.transpose(self.pX), self.A1)
X2 = np.kron(np.transpose(self.pX), self.A2)
self.X = np.transpose(np.vstack((X1, X2)))
# Initial probabilities
if q is None:
self.q = np.repeat(wt, self.A1.shape[1], axis = 0)
self.q = np.repeat(wt, self.A1.shape[1], axis=0)
self.q = self.q / np.sum(self.q)
# Initial parameters
if lam is None:
self.lam = np.zeros((len(self.Y_vec),))
# solver gradient
self.grad = None
# solver hessian
self.hess = None
# for storing solver results
self.res = None
self.res = None
# print output?
self.verbose = verbose
# keep P-MEDM solver results?
self.keep_solver = keep_solver
@staticmethod
def f(lam, q, X, Y_vec, sV):
"""Objective function
"""
"""Objective function"""
qXl = q * np.exp(np.matmul(-X, lam))
lvl = lam.dot(np.matmul(sV, lam))
lvl = lam.dot(np.matmul(sV, lam))
F = Y_vec.dot(lam) + np.log(np.sum(qXl)) + (0.5 * lvl)
return F
@staticmethod
def g(lam, q, X, Y_vec, sV):
"""Gradient function
"""
"""Gradient function"""
qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl)
Xp = np.matmul(np.transpose(X), p)
G = Y_vec + np.matmul(sV, lam) - Xp
return G
@staticmethod
def h(lam, q, X, Y_vec, sV):
"""Hessian function
"""
"""Hessian function"""
qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl)
# first term
h1a = np.matmul(np.transpose(X), p)
h1b = p.dot(X)
h1 = h1a[:,None] * h1b
h1 = h1a[:, None] * h1b
# second term
dp = sparse.csr_matrix(np.diag(p))
h2 = np.matmul(X.transpose(), dp.dot(X))
H = -h1 + h2 + sV
return H
def update_probabilities(self, lam):
"""Updates occurrence probabilities
"""
"""Updates occurrence probabilities"""
qXl = self.q * np.exp(np.matmul(-self.X, lam))
self.q = qXl / np.sum(qXl)
def solve(self):
minim = minimize(method = 'trust-krylov',\
fun = self.f, x0 = self.lam, jac = self.g, hess = self.h,\
args=(self.q, self.X, self.Y_vec, self.sV),\
options = {'disp' : self.verbose, 'return_all': True})
minim = minimize(
method="trust-krylov",
fun=self.f,
x0=self.lam,
jac=self.g,
hess=self.h,
args=(self.q, self.X, self.Y_vec, self.sV),
options={"disp": self.verbose, "return_all": True},
)
# updates
self.lam = minim.x
self.update_probabilities(self.lam)
self.grad = minim.jac
self.hess = minim.hess
if self.keep_solver:
self.res = minim
\ No newline at end of file
self.res = minim
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 29 18:28:37 2021
@author: joe
"""
#%%
import sys
sys.path.append('code')
import numpy as np
import pandas as pd
import pmedm
# from pmedm import pmedm, f, g, h, solve
## Individual constraints
# cind = pd.read_csv('data/toy_constraints_ind3.csv')
cind = pd.read_csv('data/toy_constraints_ind.csv')
# cbg0 = pd.read_csv('data/toy_constraints_bg3.csv')
cbg0 = pd.read_csv('data/toy_constraints_bg.csv')
## Block group constraints
cbg0.index = cbg0.GEOID.values
# separate ests and standard errors
se_cols = cbg0.columns[np.where([k.endswith('s') for k in cbg0.columns])]
est_cols = cbg0.columns[np.where([(k not in se_cols) & (k != 'GEOID') for k in cbg0.columns])]
cbg = cbg0[est_cols]
sbg = cbg0[se_cols]
## Tract constraints
ctrt0 = cbg0.copy()
ctrt0.index = [str(i)[0] for i in ctrt0.GEOID.values]
ctrt0 = ctrt0.drop('GEOID', axis = 1)
# separate ests and standard errors
ctrt = ctrt0[est_cols]
strt = ctrt0[se_cols]
# aggregate ests
ctrt = ctrt.groupby(ctrt.index).aggregate(sum)