Skip to content

Commit 3abfa09

Browse files
authored
Merge pull request #170 from pedrovma/main
Fixing GM_KPP in the presence of pandas DF
2 parents 3ff33bc + 3b2fc63 commit 3abfa09

File tree

3 files changed

+49
-6
lines changed

3 files changed

+49
-6
lines changed

docs/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ Diagnostic tests are useful for identifying model fit, sufficiency, and specific
140140
spreg.panel_rLMlag
141141
spreg.panel_rLMerror
142142
spreg.panel_Hausman
143+
spreg.sputils.spmultiplier
143144

144145

145146
Spatial Specification Search

spreg/dgp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -909,13 +909,13 @@ def make_bin(yy):
909909
910910
>>> import numpy as np
911911
>>> import libpysal
912-
>>> from spreg import make_x, dgp_ols, dgp_pbit
912+
>>> from spreg import make_x, dgp_ols, make_bin
913913
>>> rng = np.random.default_rng(12345)
914914
>>> u = make_x(rng,25,mu=[0],varu=[1], method='normal')
915915
>>> x = make_x(rng,25,mu=[0],varu=[1])
916916
>>> xb = make_xb(x,[1,2])
917917
>>> yy = dgp_ols(u,xb)
918-
>>> dgp_pbit(yy)[0:5,:]
918+
>>> make_bin(yy)[0:5,:]
919919
array([[1],
920920
[0],
921921
[0],
@@ -925,7 +925,7 @@ def make_bin(yy):
925925
"""
926926
mm = yy.mean()
927927
y = (yy > mm)
928-
return y
928+
return y * 1
929929

930930

931931
def make_heterror(u,v):

spreg/sp_panels.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from scipy import sparse as SP
1010
import numpy as np
11+
import pandas as pd
1112
from . import ols as OLS
1213
from .utils import optim_moments, RegressionPropsY, get_spFilter, spdot, set_warn
1314
from . import user_output as USER
@@ -21,6 +22,7 @@
2122

2223

2324
class BaseGM_KKP(RegressionPropsY):
25+
2426
'''
2527
Base GMM method for a spatial random effects panel model based on
2628
Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`.
@@ -68,6 +70,7 @@ class BaseGM_KKP(RegressionPropsY):
6870
'''
6971

7072
def __init__(self, y, x, w, full_weights=False):
73+
7174
# 1a. OLS --> \tilde{\delta}
7275
ols = OLS.BaseOLS(y=y, x=x)
7376
self.x, self.y, self.n, self.k, self.xtx = ols.x, ols.y, ols.n, ols.k, ols.xtx
@@ -115,16 +118,17 @@ def __init__(self, y, x, w, full_weights=False):
115118

116119

117120
class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
121+
118122
'''
119123
GMM method for a spatial random effects panel model based on
120124
Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`.
121125
122126
Parameters
123127
----------
124-
y : array
128+
y : array or pandas DataFrame
125129
n*tx1 or nxt array for dependent variable
126-
x : array
127-
Two dimensional array with n*t rows and k columns for
130+
x : array or pandas DataFrame
131+
Two dimensional array or DF with n*t rows and k columns for
128132
independent (exogenous) variable or n rows and k*t columns
129133
(note, must not include a constant term)
130134
w : spatial weights object
@@ -195,59 +199,76 @@ class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
195199
"""
196200
Examples
197201
--------
202+
198203
We first need to import the needed modules, namely numpy to convert the
199204
data we read into arrays that ``spreg`` understands and ``pysal`` to
200205
perform all the analysis.
206+
201207
>>> from spreg import GM_KKP
202208
>>> import numpy as np
203209
>>> import libpysal
210+
204211
Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open().
205212
This is the DBF associated with the NAT shapefile. Note that
206213
libpysal.io.open() also reads data in CSV format; The GM_KKP function requires
207214
data to be passed in as numpy arrays, hence the user can read their
208215
data in using any method.
216+
209217
>>> nat = libpysal.examples.load_example('NCOVR')
210218
>>> db = libpysal.io.open(nat.get_path("NAT.dbf"),'r')
219+
211220
Extract the HR (homicide rates) data in the 70's, 80's and 90's from the DBF file
212221
and make it the dependent variable for the regression. Note that the data can also
213222
be passed in the long format instead of wide format (i.e. a vector with n*t rows
214223
and a single column for the dependent variable and a matrix of dimension n*txk
215224
for the independent variables).
225+
216226
>>> name_y = ['HR70','HR80','HR90']
217227
>>> y = np.array([db.by_col(name) for name in name_y]).T
228+
218229
Extract RD and PS in the same time periods from the DBF to be used as
219230
independent variables in the regression. Note that PySAL requires this to
220231
be an nxk*t numpy array, where k is the number of independent variables (not
221232
including a constant) and t is the number of time periods. Data must be
222233
organized in a way that all time periods of a given variable are side-by-side
223234
and in the correct time order.
224235
By default a vector of ones will be added to the independent variables passed in.
236+
225237
>>> name_x = ['RD70','RD80','RD90','PS70','PS80','PS90']
226238
>>> x = np.array([db.by_col(name) for name in name_x]).T
239+
227240
Since we want to run a spatial error panel model, we need to specify the spatial
228241
weights matrix that includes the spatial configuration of the observations
229242
into the error component of the model. To do that, we can open an already
230243
existing gal file or create a new one. In this case, we will create one
231244
from ``NAT.shp``.
245+
232246
>>> w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path("NAT.shp"))
247+
233248
Unless there is a good reason not to do it, the weights have to be
234249
row-standardized so every row of the matrix sums to one. Among other
235250
things, his allows to interpret the spatial lag of a variable as the
236251
average value of the neighboring observations. In PySAL, this can be
237252
easily performed in the following way:
253+
238254
>>> w.transform = 'r'
255+
239256
We are all set with the preliminaries, we are good to run the model. In this
240257
case, we will need the variables and the weights matrix. If we want to
241258
have the names of the variables printed in the output summary, we will
242259
have to pass them in as well, although this is optional. In this example
243260
we set full_weights to False (the default), indicating that we will use
244261
only 2 sets of moments weights for the first 3 and the last 3 moment conditions.
262+
245263
>>> reg = GM_KKP(y,x,w,full_weights=False,name_y=name_y, name_x=name_x)
264+
246265
Warning: Assuming time data is in wide format, i.e. y[0] refers to T0, y[1], refers to T1, etc.
247266
Similarly, assuming x[0:k] refers to independent variables for T0, x[k+1:2k] refers to T1, etc.
267+
248268
Once we have run the model, we can explore a little bit the output. We can
249269
either request a printout of the results with the command print(reg.summary) or
250270
check out the individual attributes of GM_KKP:
271+
251272
>>> print(reg.summary)
252273
REGRESSION
253274
----------
@@ -271,18 +292,23 @@ class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
271292
sigma2_1 39.9099323
272293
------------------------------------------------------------------------------------
273294
================================ END OF REPORT =====================================
295+
274296
>>> print(reg.name_x)
275297
['CONSTANT', 'RD', 'PS', 'lambda', ' sigma2_v', 'sigma2_1']
298+
276299
The attribute reg.betas contains all the coefficients: betas, the spatial error
277300
coefficient lambda, sig2_v and sig2_1:
301+
278302
>>> print(np.around(reg.betas,4))
279303
[[ 6.4922]
280304
[ 3.6245]
281305
[ 1.3119]
282306
[ 0.4178]
283307
[22.8191]
284308
[39.9099]]
309+
285310
Finally, we can check the standard erros of the betas:
311+
286312
>>> print(np.around(np.sqrt(reg.vm.diagonal().reshape(3,1)),4))
287313
[[0.1127]
288314
[0.0877]
@@ -458,6 +484,22 @@ def _get_panel_data(y, x, w, name_y, name_x):
458484
Names of independent variables for use in output
459485
"""
460486

487+
if isinstance(y, (pd.Series, pd.DataFrame)):
488+
if name_y is None:
489+
try:
490+
name_y = y.columns.to_list()
491+
except AttributeError:
492+
name_y = y.name
493+
y = y.to_numpy()
494+
495+
if isinstance(x, (pd.Series, pd.DataFrame)):
496+
if name_x is None:
497+
try:
498+
name_x = x.columns.to_list()
499+
except AttributeError:
500+
name_x = x.name
501+
x = x.to_numpy()
502+
461503
if y.shape[0] / w.n != y.shape[0] // w.n:
462504
raise Exception("y must be ntx1 or nxt, and w must be an nxn PySAL W object.")
463505
N, T = y.shape[0], y.shape[1]

0 commit comments

Comments
 (0)