diff --git a/__init__.pyc b/__init__.pyc index 800b98f..0474085 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_myXGBoost/__init__.pyc b/q01_myXGBoost/__init__.pyc index dcfccf7..7b92186 100644 Binary files a/q01_myXGBoost/__init__.pyc and b/q01_myXGBoost/__init__.pyc differ diff --git a/q01_myXGBoost/build.py b/q01_myXGBoost/build.py index f000406..2fe75bf 100644 --- a/q01_myXGBoost/build.py +++ b/q01_myXGBoost/build.py @@ -1,3 +1,4 @@ +# %load q01_myXGBoost/build.py import pandas as pd from xgboost import XGBClassifier from sklearn.model_selection import train_test_split @@ -17,9 +18,10 @@ "colsample_bytree": [0.6, .7, .8, .9, 1] } - +model=XGBClassifier(seed=9) +def myXGBoost(X_train,X_test,y_train,y_test,model,param_grid1,kFold=3): + gs=GridSearchCV(model,param_grid1,cv=kFold) + gs.fit(X_train,y_train) + y_pred=gs.predict(X_test) + return accuracy_score(y_test,y_pred),gs.best_params_ # Write your solution here : - - - - diff --git a/q01_myXGBoost/build.pyc b/q01_myXGBoost/build.pyc index 2b98a8a..e4dd502 100644 Binary files a/q01_myXGBoost/build.pyc and b/q01_myXGBoost/build.pyc differ diff --git a/q01_myXGBoost/tests/__init__.pyc b/q01_myXGBoost/tests/__init__.pyc index 7411455..57cbd65 100644 Binary files a/q01_myXGBoost/tests/__init__.pyc and b/q01_myXGBoost/tests/__init__.pyc differ diff --git a/q01_myXGBoost/tests/test_q01_myXGBoost.pyc b/q01_myXGBoost/tests/test_q01_myXGBoost.pyc index 54780c7..f91cc55 100644 Binary files a/q01_myXGBoost/tests/test_q01_myXGBoost.pyc and b/q01_myXGBoost/tests/test_q01_myXGBoost.pyc differ diff --git a/q02_param2/__init__.pyc b/q02_param2/__init__.pyc index fae1a21..7bd9c5b 100644 Binary files a/q02_param2/__init__.pyc and b/q02_param2/__init__.pyc differ diff --git a/q02_param2/build.py b/q02_param2/build.py index 156fe17..5246c99 100644 --- a/q02_param2/build.py +++ b/q02_param2/build.py @@ -1,21 +1,60 @@ -# Default imports -from sklearn.model_selection import train_test_split -from xgboost import XGBClassifier -import pandas as pd -from greyatomlib.Xgboost_project.q01_myXGBoost.build import myXGBoost - -# load data -dataset = pd.read_csv('data/loan_clean_data.csv') -# split data into X and y -X = dataset.iloc[:, :-1] -y = dataset.iloc[:, -1] -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) - -param_grid2 = {"gamma": [0, 0.05, 0.1, 0.3, 0.7, 0.9, 1], - "reg_alpha": [0, 0.001, 0.005, 0.01, 0.05, 0.1], - "reg_lambda": [0.05, 0.1, 0.5, 1.0] - } - - -# Write your solution here : - + +# %load q02_param2/build.py +# Default imports +from sklearn.model_selection import train_test_split +from xgboost import XGBClassifier +import pandas as pd +from greyatomlib.Xgboost_project.q01_myXGBoost.build import myXGBoost + +# load data +dataset = pd.read_csv('data/loan_clean_data.csv') +# split data into X and y +X = dataset.iloc[:, :-1] +y = dataset.iloc[:, -1] +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) + +param_grid2 = {"gamma": [0, 0.05, 0.1, 0.3, 0.7, 0.9, 1], + "reg_alpha": [0, 0.001, 0.005, 0.01, 0.05, 0.1], + "reg_lambda": [0.05, 0.1, 0.5, 1.0] + } + +model = XGBClassifier(seed=9) + +# Write your solution here : +def param2(X_train, X_test, y_train, y_test,model,param_grid): + + param_grid1 = {"max_depth": [2, 3, 4, 5, 6, 7, 9, 11], + "min_child_weight": [4, 6, 7, 8], + "subsample": [0.6, .7, .8, .9, 1], + "colsample_bytree": [0.6, .7, .8, .9, 1] + } + cc_score,bestParam = myXGBoost(X_train, X_test, y_train, y_test,model,param_grid1,3) + #print(bestParam) + #dic ={} + + #updatePara = {'subsample': 0.8, 'colsample_bytree': 0.7, 'max_depth': 2, 'min_child_weight': 4} + + param_g={} + for k, v in bestParam.items(): + #print(k,v) + param_g[k]=[v] + + #print(param_g) + + #updateParam = {'subsample': [0.8], 'colsample_bytree': [0.7], 'max_depth': [2], 'min_child_weight': [4]} + + updateParam=param_g.copy() + updateParam.update(param_grid) + #print(updateParam) + cc_score2,bestParam2= myXGBoost(X_train, X_test, y_train, y_test,model,updateParam,3) + + #print(cc_score2,bestParam2) + update_best_param={k: v for k, v in bestParam2.items() if k not in param_g} + return (cc_score2.item(),update_best_param) + + +# accuracy1, best_params1 =param2(X_train, X_test, y_train, y_test,model,param_grid2) +# print(type(accuracy1)) +# print( type(best_params1)) +# print(accuracy1) +# print(best_params1) diff --git a/q02_param2/build.pyc b/q02_param2/build.pyc index 1db061f..ee6b17e 100644 Binary files a/q02_param2/build.pyc and b/q02_param2/build.pyc differ diff --git a/q02_param2/tests/__init__.pyc b/q02_param2/tests/__init__.pyc index 058448a..fa27751 100644 Binary files a/q02_param2/tests/__init__.pyc and b/q02_param2/tests/__init__.pyc differ diff --git a/q02_param2/tests/test_q02_param2.pyc b/q02_param2/tests/test_q02_param2.pyc index 5e496da..95ca710 100644 Binary files a/q02_param2/tests/test_q02_param2.pyc and b/q02_param2/tests/test_q02_param2.pyc differ diff --git a/q03_xgboost/__init__.pyc b/q03_xgboost/__init__.pyc index 4fb1998..4226a01 100644 Binary files a/q03_xgboost/__init__.pyc and b/q03_xgboost/__init__.pyc differ diff --git a/q03_xgboost/build.py b/q03_xgboost/build.py index fc75b96..f720e00 100644 --- a/q03_xgboost/build.py +++ b/q03_xgboost/build.py @@ -1,16 +1,33 @@ -# Default imports -from sklearn.model_selection import train_test_split -from xgboost import XGBClassifier -import pandas as pd -from sklearn.metrics import accuracy_score - -# load data -dataset = pd.read_csv('data/loan_clean_data.csv') -# split data into X and y -X = dataset.iloc[:, :-1] -y = dataset.iloc[:, -1] -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) - - -# Write your solution here : - + +# %load q03_xgboost/build.py +# Default imports +from sklearn.model_selection import train_test_split +from xgboost import XGBClassifier +import pandas as pd +from sklearn.metrics import accuracy_score + +# load data +dataset = pd.read_csv('data/loan_clean_data.csv') +# split data into X and y +X = dataset.iloc[:, :-1] +y = dataset.iloc[:, -1] +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) + + +# Write your solution here : +def xgboost(X_train, X_test, y_train, y_test,**kwargs): + model = XGBClassifier(seed=9,**kwargs) + #subsample=0.8,colsample_bytree=0.7, max_depth=2, min_child_weight=4, reg_alpha=0, reg_lambda=1.0,gamma=0,n_estimators=100,learning_rate=0.1) + model.fit(X_train,y_train) + y_pred=model.predict(X_test) + acc_score=accuracy_score(y_test,y_pred) + return acc_score.item() + + + +# accuracy=xgboost(X_train, X_test, y_train, y_test,subsample=0.8, +# colsample_bytree=0.7, max_depth=2, +# min_child_weight=4, reg_alpha=0, reg_lambda=1.0, +# gamma=0,n_estimators=100,learning_rate=0.1) +# print(accuracy) +# print(type(accuracy)) diff --git a/q03_xgboost/build.pyc b/q03_xgboost/build.pyc index fab0e81..638f852 100644 Binary files a/q03_xgboost/build.pyc and b/q03_xgboost/build.pyc differ diff --git a/q03_xgboost/tests/__init__.pyc b/q03_xgboost/tests/__init__.pyc index c17cec4..3d393b2 100644 Binary files a/q03_xgboost/tests/__init__.pyc and b/q03_xgboost/tests/__init__.pyc differ diff --git a/q03_xgboost/tests/test_q03_xgboost.pyc b/q03_xgboost/tests/test_q03_xgboost.pyc index 921bfbf..6d42619 100644 Binary files a/q03_xgboost/tests/test_q03_xgboost.pyc and b/q03_xgboost/tests/test_q03_xgboost.pyc differ