8
8
9
9
from scipy import sparse as SP
10
10
import numpy as np
11
+ import pandas as pd
11
12
from . import ols as OLS
12
13
from .utils import optim_moments , RegressionPropsY , get_spFilter , spdot , set_warn
13
14
from . import user_output as USER
21
22
22
23
23
24
class BaseGM_KKP (RegressionPropsY ):
25
+
24
26
'''
25
27
Base GMM method for a spatial random effects panel model based on
26
28
Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`.
@@ -68,6 +70,7 @@ class BaseGM_KKP(RegressionPropsY):
68
70
'''
69
71
70
72
def __init__ (self , y , x , w , full_weights = False ):
73
+
71
74
# 1a. OLS --> \tilde{\delta}
72
75
ols = OLS .BaseOLS (y = y , x = x )
73
76
self .x , self .y , self .n , self .k , self .xtx = ols .x , ols .y , ols .n , ols .k , ols .xtx
@@ -115,16 +118,17 @@ def __init__(self, y, x, w, full_weights=False):
115
118
116
119
117
120
class GM_KKP (BaseGM_KKP , REGI .Regimes_Frame ):
121
+
118
122
'''
119
123
GMM method for a spatial random effects panel model based on
120
124
Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`.
121
125
122
126
Parameters
123
127
----------
124
- y : array
128
+ y : array or pandas DataFrame
125
129
n*tx1 or nxt array for dependent variable
126
- x : array
127
- Two dimensional array with n*t rows and k columns for
130
+ x : array or pandas DataFrame
131
+ Two dimensional array or DF with n*t rows and k columns for
128
132
independent (exogenous) variable or n rows and k*t columns
129
133
(note, must not include a constant term)
130
134
w : spatial weights object
@@ -195,59 +199,76 @@ class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
195
199
"""
196
200
Examples
197
201
--------
202
+
198
203
We first need to import the needed modules, namely numpy to convert the
199
204
data we read into arrays that ``spreg`` understands and ``pysal`` to
200
205
perform all the analysis.
206
+
201
207
>>> from spreg import GM_KKP
202
208
>>> import numpy as np
203
209
>>> import libpysal
210
+
204
211
Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open().
205
212
This is the DBF associated with the NAT shapefile. Note that
206
213
libpysal.io.open() also reads data in CSV format; The GM_KKP function requires
207
214
data to be passed in as numpy arrays, hence the user can read their
208
215
data in using any method.
216
+
209
217
>>> nat = libpysal.examples.load_example('NCOVR')
210
218
>>> db = libpysal.io.open(nat.get_path("NAT.dbf"),'r')
219
+
211
220
Extract the HR (homicide rates) data in the 70's, 80's and 90's from the DBF file
212
221
and make it the dependent variable for the regression. Note that the data can also
213
222
be passed in the long format instead of wide format (i.e. a vector with n*t rows
214
223
and a single column for the dependent variable and a matrix of dimension n*txk
215
224
for the independent variables).
225
+
216
226
>>> name_y = ['HR70','HR80','HR90']
217
227
>>> y = np.array([db.by_col(name) for name in name_y]).T
228
+
218
229
Extract RD and PS in the same time periods from the DBF to be used as
219
230
independent variables in the regression. Note that PySAL requires this to
220
231
be an nxk*t numpy array, where k is the number of independent variables (not
221
232
including a constant) and t is the number of time periods. Data must be
222
233
organized in a way that all time periods of a given variable are side-by-side
223
234
and in the correct time order.
224
235
By default a vector of ones will be added to the independent variables passed in.
236
+
225
237
>>> name_x = ['RD70','RD80','RD90','PS70','PS80','PS90']
226
238
>>> x = np.array([db.by_col(name) for name in name_x]).T
239
+
227
240
Since we want to run a spatial error panel model, we need to specify the spatial
228
241
weights matrix that includes the spatial configuration of the observations
229
242
into the error component of the model. To do that, we can open an already
230
243
existing gal file or create a new one. In this case, we will create one
231
244
from ``NAT.shp``.
245
+
232
246
>>> w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path("NAT.shp"))
247
+
233
248
Unless there is a good reason not to do it, the weights have to be
234
249
row-standardized so every row of the matrix sums to one. Among other
235
250
things, his allows to interpret the spatial lag of a variable as the
236
251
average value of the neighboring observations. In PySAL, this can be
237
252
easily performed in the following way:
253
+
238
254
>>> w.transform = 'r'
255
+
239
256
We are all set with the preliminaries, we are good to run the model. In this
240
257
case, we will need the variables and the weights matrix. If we want to
241
258
have the names of the variables printed in the output summary, we will
242
259
have to pass them in as well, although this is optional. In this example
243
260
we set full_weights to False (the default), indicating that we will use
244
261
only 2 sets of moments weights for the first 3 and the last 3 moment conditions.
262
+
245
263
>>> reg = GM_KKP(y,x,w,full_weights=False,name_y=name_y, name_x=name_x)
264
+
246
265
Warning: Assuming time data is in wide format, i.e. y[0] refers to T0, y[1], refers to T1, etc.
247
266
Similarly, assuming x[0:k] refers to independent variables for T0, x[k+1:2k] refers to T1, etc.
267
+
248
268
Once we have run the model, we can explore a little bit the output. We can
249
269
either request a printout of the results with the command print(reg.summary) or
250
270
check out the individual attributes of GM_KKP:
271
+
251
272
>>> print(reg.summary)
252
273
REGRESSION
253
274
----------
@@ -271,18 +292,23 @@ class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
271
292
sigma2_1 39.9099323
272
293
------------------------------------------------------------------------------------
273
294
================================ END OF REPORT =====================================
295
+
274
296
>>> print(reg.name_x)
275
297
['CONSTANT', 'RD', 'PS', 'lambda', ' sigma2_v', 'sigma2_1']
298
+
276
299
The attribute reg.betas contains all the coefficients: betas, the spatial error
277
300
coefficient lambda, sig2_v and sig2_1:
301
+
278
302
>>> print(np.around(reg.betas,4))
279
303
[[ 6.4922]
280
304
[ 3.6245]
281
305
[ 1.3119]
282
306
[ 0.4178]
283
307
[22.8191]
284
308
[39.9099]]
309
+
285
310
Finally, we can check the standard erros of the betas:
311
+
286
312
>>> print(np.around(np.sqrt(reg.vm.diagonal().reshape(3,1)),4))
287
313
[[0.1127]
288
314
[0.0877]
@@ -458,6 +484,22 @@ def _get_panel_data(y, x, w, name_y, name_x):
458
484
Names of independent variables for use in output
459
485
"""
460
486
487
+ if isinstance (y , (pd .Series , pd .DataFrame )):
488
+ if name_y is None :
489
+ try :
490
+ name_y = y .columns .to_list ()
491
+ except AttributeError :
492
+ name_y = y .name
493
+ y = y .to_numpy ()
494
+
495
+ if isinstance (x , (pd .Series , pd .DataFrame )):
496
+ if name_x is None :
497
+ try :
498
+ name_x = x .columns .to_list ()
499
+ except AttributeError :
500
+ name_x = x .name
501
+ x = x .to_numpy ()
502
+
461
503
if y .shape [0 ] / w .n != y .shape [0 ] // w .n :
462
504
raise Exception ("y must be ntx1 or nxt, and w must be an nxn PySAL W object." )
463
505
N , T = y .shape [0 ], y .shape [1 ]
0 commit comments