Skip to content

Commit 9eb2e1d

Browse files
committed
<add>: add housing data regression modeling
1 parent 038b423 commit 9eb2e1d

File tree

2 files changed

+44
-4
lines changed

2 files changed

+44
-4
lines changed

PandasPractice/housing.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import pandas as pd
22
import numpy as np
3+
from sklearn.model_selection import train_test_split
4+
from sklearn.preprocessing import MinMaxScaler
5+
6+
from sklearn.linear_model import LinearRegression
37

48
data = pd.read_csv("./datasets/housing_data.csv", header=None, sep=',') # if no columns , header = None
59
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT',
@@ -27,7 +31,7 @@
2731
# print(data.describe())
2832

2933
# 결측치 처리 - 결측치가 존재하는 모든 행 제거
30-
# data.dropna() # inplace option 사용할시 변경된 데이터프레임 만들지 않고 대체.
34+
data.dropna(inplace=True) # inplace option 사용할시 변경된 데이터프레임 만들지 않고 대체.
3135

3236
# 이상치 처리 - 판단 기준 IQR ( Interquartile range ) = q3 - q1
3337
'''
@@ -46,7 +50,34 @@ def get_iqr_outlier_prop(x):
4650
4751
return
4852
'''
49-
print(data['CRIM'].skew())
53+
# print(data['CRIM'].skew())
5054

5155
data['CRIM'] = np.log1p(data['CRIM'])
52-
print(data['CRIM'].skew())
56+
# print(data['CRIM'].skew())
57+
58+
# df_r에 종속변수를 제외한 독립변수들만 남겨놓는다.
59+
df_r = data.drop(['isHighValue'], axis=1)
60+
61+
# 변수간 상관관계 분석
62+
cols = ['MEDV', 'LSTAT', 'RM', 'CHAS', 'RAD', 'TAX']
63+
# print(df_r[cols].corr())
64+
65+
# 데이터 분할
66+
X_cols = ['LSTAT', 'PTRATIO', 'TAX', 'AGE', 'NOX', 'INDUS', 'CRIM']
67+
68+
X = df_r[X_cols].values
69+
y = df_r['MEDV'].values
70+
71+
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y, test_size=0.3, random_state=123)
72+
73+
# 데이터 스케일링
74+
scaler = MinMaxScaler()
75+
76+
X_train_r_scaled = scaler.fit_transform(X_train_r)
77+
X_test_r_scaled = scaler.fit_transform(X_test_r)
78+
79+
# 선형회귀 모델
80+
model_lr = LinearRegression()
81+
model_lr.fit(X_train_r_scaled, y_train_r)
82+
print(model_lr.coef_)
83+
print(model_lr.intercept_)

requirements.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,10 @@
1-
numpy~=1.26.4
1+
joblib==1.4.2
2+
numpy==1.26.4
3+
pandas==2.2.2
4+
python-dateutil==2.9.0.post0
5+
pytz==2024.1
6+
scikit-learn==1.5.0
7+
scipy==1.13.1
8+
six==1.16.0
9+
threadpoolctl==3.5.0
10+
tzdata==2024.1

0 commit comments

Comments
 (0)