Skip to content

Commit d68f3aa

Browse files
authored
Merge pull request #171 from pedrovma/main
Release 1.8.2
2 parents 3abfa09 + 101dda3 commit d68f3aa

33 files changed

+2821
-3394
lines changed

docs/api.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ These are the standard spatial regression models supported by the `spreg` packag
2626

2727
spreg.GM_Lag
2828
spreg.ML_Lag
29-
spreg.GMM_Error
3029
spreg.ML_Error
30+
spreg.GMM_Error
3131
spreg.GM_Error
3232
spreg.GM_Error_Het
3333
spreg.GM_Error_Hom
@@ -141,6 +141,7 @@ Diagnostic tests are useful for identifying model fit, sufficiency, and specific
141141
spreg.panel_rLMerror
142142
spreg.panel_Hausman
143143
spreg.sputils.spmultiplier
144+
spreg.diagnostics_probit.sp_tests
144145

145146

146147
Spatial Specification Search
@@ -184,4 +185,4 @@ Tools for simulating synthetic data according to data-generating processes impli
184185
spreg.dgp.dgp_probit
185186
spreg.dgp.make_bin
186187
spreg.dgp.make_heterror
187-
spreg.dgp.make_vmult
188+
spreg.dgp.make_vmult

spreg/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .diagnostics_sp import *
99
from .diagnostics_sur import *
1010
from .diagnostics_tsls import *
11+
from .diagnostics_probit import *
1112
from .error_sp import *
1213
from .error_sp_het import *
1314
from .error_sp_het_regimes import *

spreg/diagnostics_probit.py

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
"""
2+
Diagnostics in probit regression.
3+
4+
"""
5+
__author__ = (
6+
"Luc Anselin [email protected], Pedro Amaral [email protected] "
7+
)
8+
9+
from math import sqrt, pi
10+
11+
from libpysal.common import MISSINGVALUE
12+
import numpy as np
13+
import numpy.linalg as la
14+
import scipy.sparse as SP
15+
from scipy import stats
16+
from scipy.stats import norm
17+
18+
__all__ = [
19+
"pred_table",
20+
"probit_fit",
21+
"probit_lrtest",
22+
"mcfad_rho",
23+
"probit_ape",
24+
"sp_tests",
25+
"moran_KP",
26+
]
27+
28+
29+
def pred_table(reg):
30+
"""
31+
Calculates a table comparing predicted to actual outcomes for a
32+
discrete choice model
33+
34+
Parameters
35+
----------
36+
reg : regression object
37+
output instance from a probit regression model
38+
39+
Returns
40+
----------
41+
predtab_vals : dictionary
42+
includes margins and cells of actual and predicted
43+
values for discrete choice model
44+
actpos : observed positives (=1)
45+
actneg : observed negatives (=0)
46+
predpos : predicted positives
47+
predneg : predicted negatives
48+
truepos : predicted 1 when actual = 1
49+
falsepos : predicted 1 when actual = 0
50+
trueneg : predicted 0 when actual = 0
51+
falseneg : predicted 0 when actual = 1
52+
53+
"""
54+
predtab_vals = {}
55+
pos = reg.y.sum()
56+
predtab_vals["actpos"] = int(pos)
57+
neg = reg.n - pos
58+
predtab_vals["actneg"] = int(neg)
59+
act1 = (reg.y == 1) * 1
60+
act0 = (reg.y == 0) * 1
61+
ppos = reg.predybin.sum()
62+
predtab_vals["predpos"] = ppos
63+
pneg = reg.n - ppos
64+
predtab_vals["predneg"] = pneg
65+
pred1 = (reg.predybin == 1) * 1
66+
pred0 = (reg.predybin == 0) * 1
67+
truep = (pred1 * act1) * 1
68+
predtab_vals["truepos"] = truep.sum()
69+
truen = (pred0 * act0) * 1
70+
predtab_vals["trueneg"] = truen.sum()
71+
fpos = (pred1 * act0) * 1
72+
predtab_vals["falsepos"] = fpos.sum()
73+
fneg = (pred0 * act1) * 1
74+
predtab_vals["falseneg"] = fneg.sum()
75+
76+
return predtab_vals
77+
78+
79+
def probit_fit(reg):
80+
"""
81+
Various measures of fit for discrete choice models, derived from the
82+
prediction table (pred_table)
83+
84+
Parameters
85+
----------
86+
reg : regression object
87+
output instance from a probit regression model
88+
must contain predtable attribute
89+
90+
Returns
91+
----------
92+
prob_fit : a dictionary containing various measures of fit
93+
TPR : true positive rate (sensitivity, recall, hit rate)
94+
TNR : true negative rate (specificity, selectivity)
95+
PREDPC : accuracy, percent correctly predicted
96+
BA : balanced accuracy
97+
98+
"""
99+
100+
prob_fit = {}
101+
prob_fit["TPR"] = 100.0 * reg.predtable["truepos"] / reg.predtable["actpos"]
102+
prob_fit["TNR"] = 100.0 * reg.predtable["trueneg"] / reg.predtable["actneg"]
103+
prob_fit["BA"] = (prob_fit["TPR"] + prob_fit["TNR"])/2.0
104+
prob_fit["PREDPC"] = 100.0 * (reg.predtable["truepos"] + reg.predtable["trueneg"]) / reg.n
105+
106+
return prob_fit
107+
108+
def probit_lrtest(regprob):
109+
"""
110+
Likelihood ratio test statistic for probit model
111+
112+
Parameters
113+
----------
114+
regprob : probit regression object
115+
116+
Returns
117+
-------
118+
119+
likratio : dictionary
120+
contains the statistic for the null model (L0), the LR test(likr),
121+
the degrees of freedom (df) and the p-value (pvalue)
122+
L0 : float
123+
log likelihood of null model
124+
likr : float
125+
likelihood ratio statistic
126+
df : integer
127+
degrees of freedom
128+
p-value : float
129+
p-value
130+
"""
131+
132+
likratio = {}
133+
P = np.mean(regprob.y)
134+
L0 = regprob.n * (P * np.log(P) + (1 - P) * np.log(1 - P))
135+
likratio["L0"] = L0
136+
LR = -2.0 * (L0 - regprob.logl)
137+
likratio["likr"] = LR
138+
likratio["df"] = regprob.k
139+
pval = stats.chisqprob(LR, regprob.k)
140+
likratio["p-value"] = pval
141+
142+
return likratio
143+
144+
def mcfad_rho(regprob):
145+
"""
146+
McFadden's rho measure of fit
147+
148+
Parameters
149+
---------
150+
regprob : probit regression object
151+
152+
Returns
153+
-------
154+
rho : McFadden's rho (1 - L/L0)
155+
156+
"""
157+
158+
rho = 1.0 - (regprob.logl / regprob.L0)
159+
return rho
160+
161+
def probit_ape(regprob):
162+
"""
163+
Average partial effects
164+
165+
Parameters
166+
----------
167+
regprob : probit regression object
168+
169+
Returns
170+
-------
171+
tuple with:
172+
scale : the scale of the marginal effects, determined by regprob.scalem
173+
Default: 'phimean' (Mean of individual marginal effects)
174+
Alternative: 'xmean' (Marginal effects at variables mean)
175+
slopes : marginal effects or average partial effects (not for constant)
176+
slopes_vm : estimates of variance of marginal effects (not for constant)
177+
slopes_std_err : estimates of standard errors of marginal effects
178+
slopes_z_stat : tuple with z-statistics and p-values for marginal effects
179+
180+
"""
181+
182+
183+
if regprob.scalem == "xmean":
184+
xmb = regprob.xmean.T @ regprob.betas
185+
scale = stats.norm.pdf(xmb)
186+
187+
elif regprob.scalem == "phimean":
188+
scale = np.mean(regprob.phiy,axis=0)
189+
190+
# average partial effects (no constant)
191+
slopes = (regprob.betas[1:,0] * scale).reshape(-1,1)
192+
193+
# variance of partial effects
194+
xmb = regprob.xmean.T @ regprob.betas
195+
bxt = regprob.betas @ regprob.xmean.T
196+
dfdb = np.eye(regprob.k) - xmb * bxt
197+
slopes_vm = (scale ** 2) * ((dfdb @ regprob.vm) @ dfdb.T)
198+
199+
# standard errors
200+
slopes_std_err = np.sqrt(slopes_vm[1:,1:].diagonal()).reshape(-1,1)
201+
202+
# z-stats and p-values
203+
sl_zStat = slopes / slopes_std_err
204+
slopes_z_stat = [(sl_zStat[i,0],stats.norm.sf(abs(sl_zStat[i,0])) * 2) for i in range(len(slopes))]
205+
206+
207+
return (scale, slopes,slopes_vm[1:,1:],slopes_std_err,slopes_z_stat)
208+
209+
210+
def sp_tests(regprob=None, obj_list=None):
211+
"""
212+
Calculates tests for spatial dependence in Probit models
213+
214+
Parameters
215+
----------
216+
regprob : regression object from spreg
217+
output instance from a probit model
218+
obj_list : list
219+
list of regression elements from both libpysal and statsmodels' ProbitResults
220+
The list should be such as:
221+
[libpysal.weights, ProbitResults.fittedvalues, ProbitResults.resid_response, ProbitResults.resid_generalized]
222+
223+
Returns
224+
-------
225+
tuple with LM_Err, moran, ps as 2x1 arrays with statistic and p-value
226+
LM_Err: Pinkse
227+
moran : Kelejian-Prucha generalized Moran
228+
ps : Pinkse-Slade
229+
230+
Examples
231+
--------
232+
The results of this function will be automatically added to the output of the probit model if using spreg.
233+
If using the Probit estimator from statsmodels, the user can call the function with the obj_list argument.
234+
The argument obj_list should be a list with the following elements, in this order:
235+
[libpysal.weights, ProbitResults.fittedvalues, ProbitResults.resid_response, ProbitResults.resid_generalized]
236+
The function will then return and print the results of the spatial diagnostics.
237+
238+
>>> import libpysal
239+
>>> import statsmodels.api as sm
240+
>>> import geopandas as gpd
241+
>>> from spreg.diagnostics_probit import sp_tests
242+
243+
>>> columb = libpysal.examples.load_example('Columbus')
244+
>>> dfs = gpd.read_file(columb.get_path("columbus.shp"))
245+
>>> w = libpysal.weights.Queen.from_dataframe(dfs)
246+
>>> w.transform='r'
247+
248+
>>> y = (dfs["CRIME"] > 40).astype(float)
249+
>>> X = dfs[["INC","HOVAL"]]
250+
>>> X = sm.add_constant(X)
251+
252+
>>> probit_mod = sm.Probit(y, X)
253+
>>> probit_res = probit_mod.fit(disp=False)
254+
>>> LM_err, moran, ps = sp_tests(obj_list=[w, probit_res.fittedvalues, probit_res.resid_response, probit_res.resid_generalized])
255+
PROBIT MODEL DIAGNOSTICS FOR SPATIAL DEPENDENCE
256+
TEST DF VALUE PROB
257+
Kelejian-Prucha (error) 1 1.721 0.0852
258+
Pinkse (error) 1 3.132 0.0768
259+
Pinkse-Slade (error) 1 2.558 0.1097
260+
261+
"""
262+
if regprob:
263+
w, Phi, phi, u_naive, u_gen, n = regprob.w, regprob.predy, regprob.phiy, regprob.u_naive, regprob.u_gen, regprob.n
264+
elif obj_list:
265+
w, fittedvalues, u_naive, u_gen = obj_list
266+
Phi = norm.cdf(fittedvalues)
267+
phi = norm.pdf(fittedvalues)
268+
n = w.n
269+
270+
try:
271+
w = w.sparse
272+
except:
273+
w = w
274+
275+
# Pinkse_error:
276+
Phi_prod = Phi * (1 - Phi)
277+
sig2 = np.sum((phi * phi) / Phi_prod) / n
278+
LM_err_num = np.dot(u_gen.T, (w * u_gen)) ** 2
279+
trWW = np.sum((w * w).diagonal())
280+
trWWWWp = trWW + np.sum((w * w.T).diagonal())
281+
LM_err = float(1.0 * LM_err_num / (sig2 ** 2 * trWWWWp))
282+
LM_err = np.array([LM_err, stats.chisqprob(LM_err, 1)])
283+
# KP_error:
284+
moran = moran_KP(w, u_naive, Phi_prod)
285+
# Pinkse-Slade_error:
286+
u_std = u_naive / np.sqrt(Phi_prod)
287+
ps_num = np.dot(u_std.T, (w * u_std)) ** 2
288+
trWpW = np.sum((w.T * w).diagonal())
289+
ps = float(ps_num / (trWW + trWpW))
290+
# chi-square instead of bootstrap.
291+
ps = np.array([ps, stats.chisqprob(ps, 1)])
292+
293+
if obj_list:
294+
from .output import _probit_out
295+
reg_simile = type('reg_simile', (object,), {})()
296+
reg_simile.Pinkse_error = LM_err
297+
reg_simile.KP_error = moran
298+
reg_simile.PS_error = ps
299+
print("PROBIT MODEL "+_probit_out(reg_simile, spat_diag=True, sptests_only=True)[1:])
300+
301+
return LM_err, moran, ps
302+
303+
def moran_KP(w, u, sig2i):
304+
"""
305+
Calculates Kelejian-Prucha Moran-flavoured tests
306+
307+
Parameters
308+
----------
309+
310+
w : W
311+
PySAL weights instance aligned with y
312+
u : array
313+
nx1 array of naive residuals
314+
sig2i : array
315+
nx1 array of individual variance
316+
317+
Returns
318+
-------
319+
moran : array, Kelejian-Prucha Moran's I with p-value
320+
"""
321+
try:
322+
w = w.sparse
323+
except:
324+
pass
325+
moran_num = np.dot(u.T, (w * u))
326+
E = SP.lil_matrix(w.get_shape())
327+
E.setdiag(sig2i.flat)
328+
E = E.asformat("csr")
329+
WE = w * E
330+
moran_den = np.sqrt(np.sum((WE * WE + (w.T * E) * WE).diagonal()))
331+
moran = float(1.0 * moran_num / moran_den)
332+
moran = np.array([moran, stats.norm.sf(abs(moran)) * 2.0])
333+
return moran
334+
335+
336+
def _test():
337+
import doctest
338+
339+
doctest.testmod()
340+
341+
342+
if __name__ == "__main__":
343+
_test()

0 commit comments

Comments
 (0)