Unverified Commit 02ac532d authored by Gaboardi, James's avatar Gaboardi, James
Browse files

cleaning out relic files

parent c855b36f
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 29 13:46:41 2021
@author: joe
"""
import numpy as np
import pandas as pd
from scipy import sparse
from scipy.optimize import minimize
class pmedm:
"""P-MEDM class.
Attributes:
serial: Response IDs.
wt: Sample weights.
N: Total population.
n: Total responses.
pX: Individual-level constraints.
Y1: Level-1 (aggregate) constraints.
Y2: Level-2 (target) constraints.
Y_vec: Vectorized constraints.
V1: Level-1 (aggregate) error variances.
V2: Level-2 (target) error variances.
V_vec: Vectorized error variances.
sV: Diagonal matrix of error variances.
topo: Geographic crosswalk.
A1: Geographic crosswalk (np-array).
A2: Level-2 (target) identity matrix.
X: Solution space.
q: Occurrence probabilities.
lam: Parameters.
"""
def __init__(self, serial, wt, cind, cg1, cg2, sg1, sg2, q = None,\
lam = None, verbose = True, keep_solver = True):
"""
Parameters
----------
serial : TYPE
DESCRIPTION.
wt : TYPE
DESCRIPTION.
cind : TYPE
DESCRIPTION.
cg1 : TYPE
DESCRIPTION.
cg2 : TYPE
DESCRIPTION.
sg1 : TYPE
DESCRIPTION.
sg2 : TYPE
DESCRIPTION.
q : TYPE, optional
DESCRIPTION. The default is None.
lam : TYPE, optional
DESCRIPTION. The default is None.
Returns
-------
None.
"""
# Response IDs
self.serial = serial
# Sample weights
self.wt = wt
# Total population
self.N = np.sum(self.wt)
# Total responses
self.n = cind.shape[0]
# Individual constraints
self.pX = cind.values
# Geographic constraints
self.Y1 = cg1.values
self.Y2 = cg2.values
self.Y_vec = np.concatenate([self.Y1.flatten('A'), self.Y2.flatten('A')]) / self.N
# Geographic constraint error variances
self.V1 = np.square(sg1.astype('float').values)
self.V2 = np.square(sg2.astype('float').values)
self.V_vec = np.concatenate([self.V1.flatten('A'), self.V2.flatten('A')])\
* (self.n / self.N**2)
self.sV = np.diag(self.V_vec)
# Geographic topology
self.topo = pd.DataFrame({'g1': cg2.index.values,\
'g2': [str(i)[0] for i in cg2.index.values]})
self.A1 = np.array([1 * (self.topo.g2.values == G)\
for G in np.unique(self.topo.g2.values)])
self.A2 = np.identity(self.topo.shape[0])
# Solution space
X1 = np.kron(np.transpose(self.pX), self.A1)
X2 = np.kron(np.transpose(self.pX), self.A2)
self.X = np.transpose(np.vstack((X1, X2)))
# Initial probabilities
if q is None:
self.q = np.repeat(wt, self.A1.shape[1], axis = 0)
self.q = self.q / np.sum(self.q)
# Initial parameters
if lam is None:
self.lam = np.zeros((len(self.Y_vec),))
# solver gradient
self.grad = None
# solver hessian
self.hess = None
# for storing solver results
self.res = None
# print output?
self.verbose = verbose
# keep P-MEDM solver results?
self.keep_solver = keep_solver
@staticmethod
def f(lam, q, X, Y_vec, sV):
"""Objective function
"""
qXl = q * np.exp(np.matmul(-X, lam))
lvl = lam.dot(np.matmul(sV, lam))
F = Y_vec.dot(lam) + np.log(np.sum(qXl)) + (0.5 * lvl)
return F
@staticmethod
def g(lam, q, X, Y_vec, sV):
"""Gradient function
"""
qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl)
Xp = np.matmul(np.transpose(X), p)
G = Y_vec + np.matmul(sV, lam) - Xp
return G
@staticmethod
def h(lam, q, X, Y_vec, sV):
"""Hessian function
"""
qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl)
# first term
h1a = np.matmul(np.transpose(X), p)
h1b = p.dot(X)
h1 = h1a[:,None] * h1b
# second term
dp = sparse.csr_matrix(np.diag(p))
h2 = np.matmul(X.transpose(), dp.dot(X))
H = -h1 + h2 + sV
return H
def update_probabilities(self, lam):
"""Updates occurrence probabilities
"""
qXl = self.q * np.exp(np.matmul(-self.X, lam))
self.q = qXl / np.sum(qXl)
def solve(self):
minim = minimize(method = 'trust-krylov',\
fun = self.f, x0 = self.lam, jac = self.g, hess = self.h,\
args=(self.q, self.X, self.Y_vec, self.sV),\
options = {'disp' : self.verbose, 'return_all': True})
# updates
self.lam = minim.x
self.update_probabilities(self.lam)
self.grad = minim.jac
self.hess = minim.hess
if self.keep_solver:
self.res = minim
\ No newline at end of file
...@@ -11,54 +11,67 @@ import pandas as pd ...@@ -11,54 +11,67 @@ import pandas as pd
from scipy import sparse from scipy import sparse
from scipy.optimize import minimize from scipy.optimize import minimize
class pmedm: class pmedm:
"""P-MEDM class. """P-MEDM class.
Attributes: Attributes:
serial: Response IDs. serial: Response IDs.
wt: Sample weights. wt: Sample weights.
N: Total population. N: Total population.
n: Total responses. n: Total responses.
pX: Individual-level constraints. pX: Individual-level constraints.
Y1: Level-1 (aggregate) constraints. Y1: Level-1 (aggregate) constraints.
Y2: Level-2 (target) constraints. Y2: Level-2 (target) constraints.
Y_vec: Vectorized constraints. Y_vec: Vectorized constraints.
V1: Level-1 (aggregate) error variances. V1: Level-1 (aggregate) error variances.
V2: Level-2 (target) error variances. V2: Level-2 (target) error variances.
V_vec: Vectorized error variances. V_vec: Vectorized error variances.
sV: Diagonal matrix of error variances. sV: Diagonal matrix of error variances.
topo: Geographic crosswalk. topo: Geographic crosswalk.
A1: Geographic crosswalk (np-array). A1: Geographic crosswalk (np-array).
A2: Level-2 (target) identity matrix. A2: Level-2 (target) identity matrix.
X: Solution space. X: Solution space.
q: Occurrence probabilities. q: Occurrence probabilities.
lam: Parameters. lam: Parameters.
""" """
def __init__(self, serial, wt, cind, cg1, cg2, sg1, sg2, q = None,\ def __init__(
lam = None, verbose = True, keep_solver = True): self,
serial,
wt,
cind,
cg1,
cg2,
sg1,
sg2,
q=None,
lam=None,
verbose=True,
keep_solver=True,
):
""" """
Parameters Parameters
...@@ -87,150 +100,156 @@ class pmedm: ...@@ -87,150 +100,156 @@ class pmedm:
None. None.
""" """
# Response IDs # Response IDs
self.serial = serial self.serial = serial
# Sample weights # Sample weights
self.wt = wt self.wt = wt
# Total population # Total population
self.N = np.sum(self.wt) self.N = np.sum(self.wt)
# Total responses # Total responses
self.n = cind.shape[0] self.n = cind.shape[0]
# Individual constraints # Individual constraints
self.pX = cind.values self.pX = cind.values
# Geographic constraints # Geographic constraints
self.Y1 = cg1.values self.Y1 = cg1.values
self.Y2 = cg2.values self.Y2 = cg2.values
self.Y_vec = np.concatenate([self.Y1.flatten('A'), self.Y2.flatten('A')]) / self.N self.Y_vec = (
np.concatenate([self.Y1.flatten("A"), self.Y2.flatten("A")]) / self.N
)
# Geographic constraint error variances # Geographic constraint error variances
self.V1 = np.square(sg1.astype('float').values) self.V1 = np.square(sg1.astype("float").values)
self.V2 = np.square(sg2.astype('float').values) self.V2 = np.square(sg2.astype("float").values)
self.V_vec = np.concatenate([self.V1.flatten("A"), self.V2.flatten("A")]) * (
self.n / self.N ** 2
)
self.V_vec = np.concatenate([self.V1.flatten('A'), self.V2.flatten('A')])\
* (self.n / self.N**2)
self.sV = np.diag(self.V_vec) self.sV = np.diag(self.V_vec)
# Geographic topology # Geographic topology
self.topo = pd.DataFrame({'g1': cg2.index.values,\ self.topo = pd.DataFrame(
'g2': [str(i)[0] for i in cg2.index.values]}) {"g1": cg2.index.values, "g2": [str(i)[0] for i in cg2.index.values]}
)
self.A1 = np.array([1 * (self.topo.g2.values == G)\ self.A1 = np.array(
for G in np.unique(self.topo.g2.values)]) [1 * (self.topo.g2.values == G) for G in np.unique(self.topo.g2.values)]
)
self.A2 = np.identity(self.topo.shape[0]) self.A2 = np.identity(self.topo.shape[0])
# Solution space # Solution space
X1 = np.kron(np.transpose(self.pX), self.A1) X1 = np.kron(np.transpose(self.pX), self.A1)
X2 = np.kron(np.transpose(self.pX), self.A2) X2 = np.kron(np.transpose(self.pX), self.A2)
self.X = np.transpose(np.vstack((X1, X2))) self.X = np.transpose(np.vstack((X1, X2)))
# Initial probabilities # Initial probabilities
if q is None: if q is None:
self.q = np.repeat(wt, self.A1.shape[1], axis = 0) self.q = np.repeat(wt, self.A1.shape[1], axis=0)
self.q = self.q / np.sum(self.q) self.q = self.q / np.sum(self.q)
# Initial parameters # Initial parameters
if lam is None: if lam is None:
self.lam = np.zeros((len(self.Y_vec),)) self.lam = np.zeros((len(self.Y_vec),))
# solver gradient # solver gradient
self.grad = None self.grad = None
# solver hessian # solver hessian
self.hess = None self.hess = None
# for storing solver results # for storing solver results
self.res = None self.res = None
# print output? # print output?
self.verbose = verbose self.verbose = verbose
# keep P-MEDM solver results? # keep P-MEDM solver results?
self.keep_solver = keep_solver self.keep_solver = keep_solver
@staticmethod @staticmethod
def f(lam, q, X, Y_vec, sV): def f(lam, q, X, Y_vec, sV):
"""Objective function """Objective function"""
"""
qXl = q * np.exp(np.matmul(-X, lam)) qXl = q * np.exp(np.matmul(-X, lam))
lvl = lam.dot(np.matmul(sV, lam)) lvl = lam.dot(np.matmul(sV, lam))
F = Y_vec.dot(lam) + np.log(np.sum(qXl)) + (0.5 * lvl) F = Y_vec.dot(lam) + np.log(np.sum(qXl)) + (0.5 * lvl)
return F return F
@staticmethod @staticmethod
def g(lam, q, X, Y_vec, sV): def g(lam, q, X, Y_vec, sV):
"""Gradient function """Gradient function"""
"""
qXl = q * np.exp(np.matmul(-X, lam)) qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl) p = qXl / np.sum(qXl)
Xp = np.matmul(np.transpose(X), p) Xp = np.matmul(np.transpose(X), p)
G = Y_vec + np.matmul(sV, lam) - Xp G = Y_vec + np.matmul(sV, lam) - Xp
return G return G
@staticmethod @staticmethod
def h(lam, q, X, Y_vec, sV): def h(lam, q, X, Y_vec, sV):
"""Hessian function """Hessian function"""
"""
qXl = q * np.exp(np.matmul(-X, lam)) qXl = q * np.exp(np.matmul(-X, lam))
p = qXl / np.sum(qXl) p = qXl / np.sum(qXl)
# first term # first term
h1a = np.matmul(np.transpose(X), p) h1a = np.matmul(np.transpose(X), p)
h1b = p.dot(X) h1b = p.dot(X)
h1 = h1a[:,None] * h1b h1 = h1a[:, None] * h1b
# second term # second term
dp = sparse.csr_matrix(np.diag(p)) dp = sparse.csr_matrix(np.diag(p))
h2 = np.matmul(X.transpose(), dp.dot(X)) h2 = np.matmul(X.transpose(), dp.dot(X))
H = -h1 + h2 + sV H = -h1 + h2 + sV
return H return H
def update_probabilities(self, lam): def update_probabilities(self, lam):
"""Updates occurrence probabilities """Updates occurrence probabilities"""
"""
qXl = self.q * np.exp(np.matmul(-self.X, lam)) qXl = self.q * np.exp(np.matmul(-self.X, lam))
self.q = qXl / np.sum(qXl) self.q = qXl / np.sum(qXl)
def solve(self): def solve(self):
minim = minimize(method = 'trust-krylov',\ minim = minimize(
fun = self.f, x0 = self.lam, jac = self.g, hess = self.h,\ method="trust-krylov",
args=(self.q, self.X, self.Y_vec, self.sV),\ fun=self.f,
options = {'disp' : self.verbose, 'return_all': True}) x0=self.lam,
jac=self.g,
hess=self.h,
args=(self.q, self.X, self.Y_vec, self.sV),
options={"disp": self.verbose, "return_all": True},
)
# updates # updates
self.lam = minim.x self.lam = minim.x
self.update_probabilities(self.lam) self.update_probabilities(self.lam)
self.grad = minim.jac self.grad = minim.jac
self.hess = minim.hess