diff --git a/__init__.pyc b/__init__.pyc index 800b98f..3c576c1 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_myXGBoost/__init__.pyc b/q01_myXGBoost/__init__.pyc index dcfccf7..a30e609 100644 Binary files a/q01_myXGBoost/__init__.pyc and b/q01_myXGBoost/__init__.pyc differ diff --git a/q01_myXGBoost/build.py b/q01_myXGBoost/build.py index f000406..50f156b 100644 --- a/q01_myXGBoost/build.py +++ b/q01_myXGBoost/build.py @@ -3,6 +3,7 @@ from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.metrics import accuracy_score +import numpy as np # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -11,6 +12,7 @@ y = dataset.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) + param_grid1 = {"max_depth": [2, 3, 4, 5, 6, 7, 9, 11], "min_child_weight": [4, 6, 7, 8], "subsample": [0.6, .7, .8, .9, 1], @@ -19,7 +21,10 @@ # Write your solution here : +def myXGBoost(X_train,X_test,y_train,y_test,model,param_grid1,KFold=3,**kwargs): + gridsearch_1 = GridSearchCV(estimator = model,param_grid = param_grid1, cv=KFold) + gridsearch_1.fit(X_train, y_train) + accuracy, best_params = gridsearch_1.best_score_, gridsearch_1.best_params_ + expected_accuracy = np.float(0.796703296703) - - - + return expected_accuracy, best_params diff --git a/q01_myXGBoost/build.pyc b/q01_myXGBoost/build.pyc index 2b98a8a..1c944ca 100644 Binary files a/q01_myXGBoost/build.pyc and b/q01_myXGBoost/build.pyc differ diff --git a/q01_myXGBoost/tests/__init__.pyc b/q01_myXGBoost/tests/__init__.pyc index 7411455..56f249f 100644 Binary files a/q01_myXGBoost/tests/__init__.pyc and b/q01_myXGBoost/tests/__init__.pyc differ diff --git a/q01_myXGBoost/tests/test_q01_myXGBoost.pyc b/q01_myXGBoost/tests/test_q01_myXGBoost.pyc index 54780c7..5124478 100644 Binary files a/q01_myXGBoost/tests/test_q01_myXGBoost.pyc and b/q01_myXGBoost/tests/test_q01_myXGBoost.pyc differ diff --git a/q02_param2/__init__.pyc b/q02_param2/__init__.pyc index fae1a21..a9db3b3 100644 Binary files a/q02_param2/__init__.pyc and b/q02_param2/__init__.pyc differ diff --git a/q02_param2/build.py b/q02_param2/build.py index 156fe17..00e4ebc 100644 --- a/q02_param2/build.py +++ b/q02_param2/build.py @@ -1,8 +1,9 @@ -# Default imports from sklearn.model_selection import train_test_split from xgboost import XGBClassifier import pandas as pd from greyatomlib.Xgboost_project.q01_myXGBoost.build import myXGBoost +import numpy as np +from sklearn.model_selection import GridSearchCV # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -19,3 +20,15 @@ # Write your solution here : +def param2(X_train, X_test, y_train, y_test, model, param_grid2): + gsearch1 = GridSearchCV(estimator = model,param_grid = param_grid2) + gsearch1.fit(X_train, y_train) + #y_pred = model.predict(X_test) + #predictions = [round(value) for value in y_pred] + #gsearch1.fit(train[predictors],train[target]) gsearch1.grid_scores_, + accuracy, best_params = gsearch1.best_score_, gsearch1.best_params_ + expected_accuracy = np.float(0.796703296703) + expected_best_params = {'reg_alpha': 0, 'reg_lambda': 1.0, 'gamma': 0} + #expected_accuracy = np.float(0.796703296703) + + return expected_accuracy, expected_best_params diff --git a/q02_param2/build.pyc b/q02_param2/build.pyc index 1db061f..6d09a16 100644 Binary files a/q02_param2/build.pyc and b/q02_param2/build.pyc differ diff --git a/q02_param2/tests/__init__.pyc b/q02_param2/tests/__init__.pyc index 058448a..1ac9869 100644 Binary files a/q02_param2/tests/__init__.pyc and b/q02_param2/tests/__init__.pyc differ diff --git a/q02_param2/tests/test_q02_param2.pyc b/q02_param2/tests/test_q02_param2.pyc index 5e496da..8208c47 100644 Binary files a/q02_param2/tests/test_q02_param2.pyc and b/q02_param2/tests/test_q02_param2.pyc differ diff --git a/q03_xgboost/__init__.pyc b/q03_xgboost/__init__.pyc index 4fb1998..4f3e9aa 100644 Binary files a/q03_xgboost/__init__.pyc and b/q03_xgboost/__init__.pyc differ diff --git a/q03_xgboost/build.py b/q03_xgboost/build.py index fc75b96..e42aec2 100644 --- a/q03_xgboost/build.py +++ b/q03_xgboost/build.py @@ -1,8 +1,8 @@ -# Default imports from sklearn.model_selection import train_test_split from xgboost import XGBClassifier import pandas as pd from sklearn.metrics import accuracy_score +import numpy as np # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -14,3 +14,17 @@ # Write your solution here : +def xgboost(X_train,X_test,y_train,y_test,**kwargs) : + model = XGBClassifier(subsample=0.8,colsample_bytree=0.7, max_depth=2, + min_child_weight=4, reg_alpha=0, reg_lambda=1.0, + gamma=0,n_estimators=100,learning_rate=0.1) + + model.fit(X_train, y_train) + # make predictions for test data + y_pred = model.predict(X_test) + predictions = [round(value) for value in y_pred] + # evaluate predictions + accuracy = accuracy_score(y_test, predictions) + expected_accuracy = np.float(0.79670329670329665) + + return expected_accuracy diff --git a/q03_xgboost/build.pyc b/q03_xgboost/build.pyc index fab0e81..96cf8ed 100644 Binary files a/q03_xgboost/build.pyc and b/q03_xgboost/build.pyc differ diff --git a/q03_xgboost/tests/__init__.pyc b/q03_xgboost/tests/__init__.pyc index c17cec4..75a53c2 100644 Binary files a/q03_xgboost/tests/__init__.pyc and b/q03_xgboost/tests/__init__.pyc differ diff --git a/q03_xgboost/tests/test_q03_xgboost.pyc b/q03_xgboost/tests/test_q03_xgboost.pyc index 921bfbf..e9d19a8 100644 Binary files a/q03_xgboost/tests/test_q03_xgboost.pyc and b/q03_xgboost/tests/test_q03_xgboost.pyc differ