diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 51086b4..cb7d45e 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_myXGBoost/__pycache__/__init__.cpython-36.pyc b/q01_myXGBoost/__pycache__/__init__.cpython-36.pyc index 05966ae..2fb3142 100644 Binary files a/q01_myXGBoost/__pycache__/__init__.cpython-36.pyc and b/q01_myXGBoost/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_myXGBoost/__pycache__/build.cpython-36.pyc b/q01_myXGBoost/__pycache__/build.cpython-36.pyc index 73181f1..2f777f3 100644 Binary files a/q01_myXGBoost/__pycache__/build.cpython-36.pyc and b/q01_myXGBoost/__pycache__/build.cpython-36.pyc differ diff --git a/q01_myXGBoost/build.py b/q01_myXGBoost/build.py index db3654a..35f1a7a 100644 --- a/q01_myXGBoost/build.py +++ b/q01_myXGBoost/build.py @@ -1,8 +1,10 @@ +# %load q01_myXGBoost/build.py import pandas as pd from xgboost import XGBClassifier from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV -from sklearn.metrics import accuracy_score +from sklearn.metrics import accuracy_score,make_scorer +from sklearn.model_selection import cross_val_score # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -11,13 +13,26 @@ y = dataset.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) -param_grid1 = {"max_depth": [2, 3, 4, 5, 6, 7, 9, 11], - "min_child_weight": [4, 6, 7, 8], - "subsample": [0.6, .7, .8, .9, 1], - "colsample_bytree": [0.6, .7, .8, .9, 1] +param_grid1 = {'max_depth': [2, 3, 4, 5, 6, 7, 9, 11], + 'min_child_weight': [4, 6, 7, 8], + 'subsample': [0.6, .7, .8, .9, 1], + 'colsample_bytree': [0.6, .7, .8, .9, 1] } - +xgb = XGBClassifier(seed=9) # Write your solution here : - +def myXGBoost(X_train, X_test, y_train, y_test, model, param_grid, KFold=3,**kwargs): + + for i, j in kwargs.items(): + lst1=list() + lst1.append(j) + param_grid[i]=lst1 + + acc_scorer = make_scorer(accuracy_score) + grid_obj = GridSearchCV(model, param_grid, scoring=acc_scorer,cv=KFold) + grid_obj = grid_obj.fit(X_train, y_train) + y_pred=grid_obj.predict(X_test) + accuracy=accuracy_score(y_test,y_pred) + best_params=grid_obj.best_params_ + return accuracy, best_params diff --git a/q01_myXGBoost/tests/__pycache__/__init__.cpython-36.pyc b/q01_myXGBoost/tests/__pycache__/__init__.cpython-36.pyc index 8dfa197..7849044 100644 Binary files a/q01_myXGBoost/tests/__pycache__/__init__.cpython-36.pyc and b/q01_myXGBoost/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_myXGBoost/tests/__pycache__/test_q01_myXGBoost.cpython-36.pyc b/q01_myXGBoost/tests/__pycache__/test_q01_myXGBoost.cpython-36.pyc index c955d76..738b102 100644 Binary files a/q01_myXGBoost/tests/__pycache__/test_q01_myXGBoost.cpython-36.pyc and b/q01_myXGBoost/tests/__pycache__/test_q01_myXGBoost.cpython-36.pyc differ diff --git a/q02_param2/__pycache__/__init__.cpython-36.pyc b/q02_param2/__pycache__/__init__.cpython-36.pyc index 65aae62..83177e8 100644 Binary files a/q02_param2/__pycache__/__init__.cpython-36.pyc and b/q02_param2/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_param2/__pycache__/build.cpython-36.pyc b/q02_param2/__pycache__/build.cpython-36.pyc index 265965e..de846c2 100644 Binary files a/q02_param2/__pycache__/build.cpython-36.pyc and b/q02_param2/__pycache__/build.cpython-36.pyc differ diff --git a/q02_param2/build.py b/q02_param2/build.py index 8391570..48a8a95 100644 --- a/q02_param2/build.py +++ b/q02_param2/build.py @@ -1,8 +1,11 @@ +# %load q02_param2/build.py # Default imports from sklearn.model_selection import train_test_split from xgboost import XGBClassifier import pandas as pd from greyatomlib.Xgboost_project.q01_myXGBoost.build import myXGBoost +from sklearn.metrics import accuracy_score,make_scorer + # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -11,10 +14,13 @@ y = dataset.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) -param_grid2 = {"gamma": [0, 0.05, 0.1, 0.3, 0.7, 0.9, 1], - "reg_alpha": [0, 0.001, 0.005, 0.01, 0.05, 0.1], - "reg_lambda": [0.05, 0.1, 0.5, 1.0] +param_grid2 = {'gamma': [0, 0.05, 0.1, 0.3, 0.7, 0.9, 1], + 'reg_alpha': [0, 0.001, 0.005, 0.01, 0.05, 0.1], + 'reg_lambda': [0.05, 0.1, 0.5, 1.0] } - - +xgb = XGBClassifier(seed=9) # Write your solution here : +def param2(X_train, X_test, y_train, y_test, model, param_grid2): + accuracy, best_params=myXGBoost(X_train, X_test, y_train, y_test, model, param_grid2,3,subsample=0.8,colsample_bytree=0.7,max_depth=2,min_child_weight=4) + return accuracy, best_params + diff --git a/q02_param2/tests/__pycache__/__init__.cpython-36.pyc b/q02_param2/tests/__pycache__/__init__.cpython-36.pyc index 19bc1aa..08f1981 100644 Binary files a/q02_param2/tests/__pycache__/__init__.cpython-36.pyc and b/q02_param2/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_param2/tests/__pycache__/test_q02_param2.cpython-36.pyc b/q02_param2/tests/__pycache__/test_q02_param2.cpython-36.pyc index 18c07a7..cb30d1f 100644 Binary files a/q02_param2/tests/__pycache__/test_q02_param2.cpython-36.pyc and b/q02_param2/tests/__pycache__/test_q02_param2.cpython-36.pyc differ diff --git a/q03_xgboost/__pycache__/__init__.cpython-36.pyc b/q03_xgboost/__pycache__/__init__.cpython-36.pyc index 2e9c375..2a6686f 100644 Binary files a/q03_xgboost/__pycache__/__init__.cpython-36.pyc and b/q03_xgboost/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_xgboost/__pycache__/build.cpython-36.pyc b/q03_xgboost/__pycache__/build.cpython-36.pyc index 4c997b3..f7fb123 100644 Binary files a/q03_xgboost/__pycache__/build.cpython-36.pyc and b/q03_xgboost/__pycache__/build.cpython-36.pyc differ diff --git a/q03_xgboost/build.py b/q03_xgboost/build.py index 7905a04..dcfdff0 100644 --- a/q03_xgboost/build.py +++ b/q03_xgboost/build.py @@ -1,8 +1,11 @@ +# %load q03_xgboost/build.py # Default imports from sklearn.model_selection import train_test_split from xgboost import XGBClassifier import pandas as pd -from sklearn.metrics import accuracy_score +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import accuracy_score,make_scorer +import numpy as np # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -13,5 +16,18 @@ # Write your solution here : - +def xgboost(X_train, X_test, y_train, y_test,**kwargs): + dict1=dict() + for i, j in kwargs.items(): + lst1=list() + lst1.append(j) + dict1[i]=lst1 + + xgb = XGBClassifier(seed=9) + acc_scorer = make_scorer(accuracy_score) + grid_obj = GridSearchCV(xgb, dict1, scoring=acc_scorer) + grid_obj = grid_obj.fit(X_train, y_train) + y_pred=grid_obj.predict(X_test) + accuracy=accuracy_score(y_test,y_pred) + return accuracy diff --git a/q03_xgboost/tests/__pycache__/__init__.cpython-36.pyc b/q03_xgboost/tests/__pycache__/__init__.cpython-36.pyc index e887bf7..786648a 100644 Binary files a/q03_xgboost/tests/__pycache__/__init__.cpython-36.pyc and b/q03_xgboost/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_xgboost/tests/__pycache__/test_q03_xgboost.cpython-36.pyc b/q03_xgboost/tests/__pycache__/test_q03_xgboost.cpython-36.pyc index 77271df..6b62605 100644 Binary files a/q03_xgboost/tests/__pycache__/test_q03_xgboost.cpython-36.pyc and b/q03_xgboost/tests/__pycache__/test_q03_xgboost.cpython-36.pyc differ