Commit 644b62aa by Tuccillo, Joe

### migrate files and update README

parent 81d571f0
 # pymedm Penalized Maximum-Entropy Dasymetric Modeling (P-MEDM) in Python \ No newline at end of file Penalized Maximum-Entropy Dasymetric Modeling (P-MEDM) in Python This is a _Work in Progress_ Python port of [PMEDMrcpp](https://bitbucket.org/nnnagle/pmedmrcpp/src/master). #### References 1. Leyk, S., Nagle, N. N., & Buttenfield, B. P. (2013). Maximum entropy dasymetric modeling for demographic small area estimation. Geographical Analysis, 45(3), 285-306. 2. Nagle, N. N., Buttenfield, B. P., Leyk, S., & Spielman, S. (2014). Dasymetric modeling and uncertainty. Annals of the Association of American Geographers, 104(1), 80-95.
code/pmedm.py 0 → 100644
 #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Aug 29 13:46:41 2021 @author: joe """ import numpy as np import pandas as pd from scipy import sparse from scipy.optimize import minimize class pmedm: """P-MEDM class. Attributes: serial: Response IDs. wt: Sample weights. N: Total population. n: Total responses. pX: Individual-level constraints. Y1: Level-1 (aggregate) constraints. Y2: Level-2 (target) constraints. Y_vec: Vectorized constraints. V1: Level-1 (aggregate) error variances. V2: Level-2 (target) error variances. V_vec: Vectorized error variances. sV: Diagonal matrix of error variances. topo: Geographic crosswalk. A1: Geographic crosswalk (np-array). A2: Level-2 (target) identity matrix. X: Solution space. q: Occurrence probabilities. lam: Parameters. """ def __init__(self, serial, wt, cind, cg1, cg2, sg1, sg2, q = None,\ lam = None, verbose = True, keep_solver = True): """ Parameters ---------- serial : TYPE DESCRIPTION. wt : TYPE DESCRIPTION. cind : TYPE DESCRIPTION. cg1 : TYPE DESCRIPTION. cg2 : TYPE DESCRIPTION. sg1 : TYPE DESCRIPTION. sg2 : TYPE DESCRIPTION. q : TYPE, optional DESCRIPTION. The default is None. lam : TYPE, optional DESCRIPTION. The default is None. Returns ------- None. """ # Response IDs self.serial = serial # Sample weights self.wt = wt # Total population self.N = np.sum(self.wt) # Total responses self.n = cind.shape[0] # Individual constraints self.pX = cind.values # Geographic constraints self.Y1 = cg1.values self.Y2 = cg2.values self.Y_vec = np.concatenate([self.Y1.flatten('A'), self.Y2.flatten('A')]) / self.N # Geographic constraint error variances self.V1 = np.square(sg1.astype('float').values) self.V2 = np.square(sg2.astype('float').values) self.V_vec = np.concatenate([self.V1.flatten('A'), self.V2.flatten('A')])\ * (self.n / self.N**2) self.sV = np.diag(self.V_vec) # Geographic topology self.topo = pd.DataFrame({'g1': cg2.index.values,\ 'g2': [str(i)[0] for i in cg2.index.values]}) self.A1 = np.array([1 * (self.topo.g2.values == G)\ for G in np.unique(self.topo.g2.values)]) self.A2 = np.identity(self.topo.shape[0]) # Solution space X1 = np.kron(np.transpose(self.pX), self.A1) X2 = np.kron(np.transpose(self.pX), self.A2) self.X = np.transpose(np.vstack((X1, X2))) # Initial probabilities if q is None: self.q = np.repeat(wt, self.A1.shape[1], axis = 0) self.q = self.q / np.sum(self.q) # Initial parameters if lam is None: self.lam = np.zeros((len(self.Y_vec),)) # solver gradient self.grad = None # solver hessian self.hess = None # for storing solver results self.res = None # print output? self.verbose = verbose # keep P-MEDM solver results? self.keep_solver = keep_solver @staticmethod def f(lam, q, X, Y_vec, sV): """Objective function """ qXl = q * np.exp(np.matmul(-X, lam)) lvl = lam.dot(np.matmul(sV, lam)) F = Y_vec.dot(lam) + np.log(np.sum(qXl)) + (0.5 * lvl) return F @staticmethod def g(lam, q, X, Y_vec, sV): """Gradient function """ qXl = q * np.exp(np.matmul(-X, lam)) p = qXl / np.sum(qXl) Xp = np.matmul(np.transpose(X), p) G = Y_vec + np.matmul(sV, lam) - Xp return G @staticmethod def h(lam, q, X, Y_vec, sV): """Hessian function """ qXl = q * np.exp(np.matmul(-X, lam)) p = qXl / np.sum(qXl) # first term h1a = np.matmul(np.transpose(X), p) h1b = p.dot(X) h1 = h1a[:,None] * h1b # second term dp = sparse.csr_matrix(np.diag(p)) h2 = np.matmul(X.transpose(), dp.dot(X)) H = -h1 + h2 + sV return H def update_probabilities(self, lam): """Updates occurrence probabilities """ qXl = self.q * np.exp(np.matmul(-self.X, lam)) self.q = qXl / np.sum(qXl) def solve(self): minim = minimize(method = 'trust-krylov',\ fun = self.f, x0 = self.lam, jac = self.g, hess = self.h,\ args=(self.q, self.X, self.Y_vec, self.sV),\ options = {'disp' : self.verbose, 'return_all': True}) # updates self.lam = minim.x self.update_probabilities(self.lam) self.grad = minim.jac self.hess = minim.hess if self.keep_solver: self.res = minim \ No newline at end of file
code/test.py 0 → 100644
 #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Sep 1 07:28:57 2021 @author: joe """ #%% # Total population N = np.sum(wt) #%% #%% # Total responses n = cind.shape[0] #%% #%% # Individual constraints pX = cind.values #%% #%% # Geographic constraints Y1 = ctrt.values Y2 = cbg.values Y_vec = np.concatenate([Y1.flatten('A'), Y2.flatten('A')]) / N #%% #%% # Geographic constraint error variances V1 = np.square(strt.astype('float').values) V2 = np.square(sbg.astype('float').values) V_vec = np.concatenate([V1.flatten('A'), V2.flatten('A')])\ * (n / N**2) #%% #%% sV = np.diag(V_vec) #%% #%% # Geographic topology topo = pd.DataFrame({'g2': cbg.index.values,\ 'g1': [str(i)[0] for i in cbg.index.values]}) #%% #%% A1 = np.array([1 * (topo.g1.values == G)\ for G in np.unique(topo.g1.values)]) #%% #%% A2 = np.identity(topo.shape[0]) #%% # Solution space X1 = np.kron(np.transpose(pX), A1) X2 = np.kron(np.transpose(pX), A2) X = np.transpose(np.vstack((X1, X2))) # Prior probabilities if q is None: q = np.repeat(wt, A1.shape[1], axis = 0) q = q / np.sum(q) # Initial parameters if lam is None: lam = np.zeros((len(Y_vec),)) \ No newline at end of file
 "","GEOID","POP","CONST1","CONST2","CONST3","POPs","CONST1s","CONST2s","CONST3s" "1","10",344,152,101,175,9.4339811320566,7.81024967590665,12.328828005938,11.1803398874989 "2","20",260,84,106,107,4.24264068711928,7.61577310586391,8.24621125123532,7.81024967590665 "3","30",682,200,242,293,8.36660026534076,12.4899959967968,14.3527000944073,10
 GEOID,POP,POPs,CONST1,CONST1s,CONST2,CONST2s,CONST3,CONST3s 101,112,6,78,6,12,6,23,3 102,122,7,47,3,18,4,94,4 103,110,2,27,4,71,10,58,10 201,110,3,53,3,10,8,67,5 202,150,3,31,7,96,2,40,6 301,136,4,49,6,28,8,58,5 302,144,3,13,6,42,4,74,3 303,148,2,13,2,61,9,31,5 304,122,5,81,8,33,6,52,5 305,132,4,44,4,78,3,78,4