Skip to content

Commit 7752e05

Browse files
committed
<add>pd practice script
1 parent 9eb2e1d commit 7752e05

File tree

4 files changed

+69
-2
lines changed

4 files changed

+69
-2
lines changed

PandasPractice/housing.py renamed to PandasPractice/housing/housing.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
from sklearn.preprocessing import MinMaxScaler
55

66
from sklearn.linear_model import LinearRegression
7+
from sklearn.svm import SVR
8+
from sklearn.ensemble import RandomForestRegressor
79

8-
data = pd.read_csv("./datasets/housing_data.csv", header=None, sep=',') # if no columns , header = None
10+
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
11+
12+
data = pd.read_csv("datasets/housing_data.csv", header=None, sep=',') # if no columns , header = None
913
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT',
1014
'MEDV', 'isHighValue']
1115

@@ -23,7 +27,6 @@
2327
med_val = data['CRIM'].median()
2428
data1['CRIM'] = data1['CRIM'].fillna(med_val)
2529

26-
2730
# print(data1.describe())
2831

2932
# 결측치 처리 - CRIM column의 행들 중 결측치가 존재하는 행들 제거
@@ -81,3 +84,29 @@ def get_iqr_outlier_prop(x):
8184
model_lr.fit(X_train_r_scaled, y_train_r)
8285
print(model_lr.coef_)
8386
print(model_lr.intercept_)
87+
88+
# SVM
89+
model_svr = SVR()
90+
model_svr.fit(X_train_r_scaled, y_train_r)
91+
92+
# RandomForest
93+
model_rfr = RandomForestRegressor(random_state=123)
94+
model_rfr.fit(X_train_r_scaled, y_train_r)
95+
for x, val in zip(X_cols, model_rfr.feature_importances_):
96+
print(f'{x} : %.3f' % val)
97+
98+
# 모형별 평가
99+
y_pred_lr = model_lr.predict(X_test_r_scaled)
100+
y_pred_svr = model_svr.predict(X_test_r_scaled)
101+
y_pred_rfr = model_rfr.predict(X_test_r_scaled)
102+
103+
print('-'*30)
104+
print("선형 회귀 결과")
105+
print('MAE : %.3f' %mean_absolute_error())
106+
print('MSE : %.3f' %mean_squared_error())
107+
print('MAPE : %.3f' %mean_absolute_percentage_error())
108+
print('-'*30)
109+
print('-'*30)
110+
a= [1,2,4,5,6]
111+
for x in a:
112+
printa)
File renamed without changes.

PandasPractice/pd.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import pandas as pd
2+
'''
3+
# python dictionary obj to pd.Series obj
4+
dict_data = {'song_name': 'STAY', 'artist': 'Bustin Jieber', 'release_date': 20220808}
5+
6+
sr = pd.Series(dict_data)
7+
print(type(sr))
8+
print('\n')
9+
print(sr) ## dtype: object
10+
11+
dict_data2 = {'a': 1, 'b': 2, '3': 20220808}
12+
sr = pd.Series(dict_data2)
13+
print(type(sr))
14+
print('\n')
15+
print(sr) ## dtype: int64
16+
17+
18+
dict_data3 = {'a': 'apple', 'b': 'bile', 'c': 'cyclone'}
19+
sr = pd.Series(dict_data3)
20+
print(type(sr))
21+
print('\n')
22+
print(sr) ## dtype: object
23+
idx = sr.index
24+
val = sr.values
25+
print(idx)
26+
print(val)
27+
print(type(val)) ## numpy.ndarray
28+
'''
29+
30+
# python tuple obj to pd.Series obj
31+
tuple_data = ('음성군', '진천읍', True, 4412)
32+
sr = pd.Series(tuple_data, ['first_loc', 'second_loc', 'including', '_id'])
33+
print(sr)
34+
35+
# slicing , choosing elements.
36+
37+
38+

0 commit comments

Comments
 (0)