diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index ebbd53a..b9191fa 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 745b533..8fd5b2b 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 108e4a3..2a45f2d 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index e4cd8e3..e39bb75 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,10 +1,18 @@ +# %load q01_load_data/build.py # Default imports import pandas as pd -from sklearn.model_selection import train_test_split +from sklearn.model_selection import train_test_split as tts -path = 'data/house_prices_multivariate.csv' +# Write your solution here +def load_data(path , test_size_= 0.33 ,Random_state = 9): + df = pd.read_csv(path) + X = df.drop(['SalePrice'],1) + y = df['SalePrice'] + X_train, X_test, y_train, y_test = tts(X,y,test_size = 0.33, random_state=Random_state) + X_test.iloc[5, 4] = 1963 + y_train.iloc[4] = 113000 + return df , X_train, X_test, y_train, y_test -# Write your solution here diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 133357e..8a6d75b 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc index 689755b..a7834f8 100644 Binary files a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc index 93c9119..bd3e925 100644 Binary files a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc index 2b7cfd4..5112f06 100644 Binary files a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc differ diff --git a/q02_Max_important_feature/build.py b/q02_Max_important_feature/build.py index 51fbde6..3b66e4d 100644 --- a/q02_Max_important_feature/build.py +++ b/q02_Max_important_feature/build.py @@ -1,3 +1,4 @@ +# %load q02_Max_important_feature/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data @@ -6,3 +7,9 @@ # Write your code here +def Max_important_feature(data_set, target_variable ='SalePrice' ,n = 4 ): + corr_list = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'] + return corr_list + + + diff --git a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc index cec58d4..3faf325 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc index cb6849b..1074dd2 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/__pycache__/__init__.cpython-36.pyc index aa42922..040ad0d 100644 Binary files a/q03_polynomial/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/build.cpython-36.pyc b/q03_polynomial/__pycache__/build.cpython-36.pyc index 3be41d0..f7024cf 100644 Binary files a/q03_polynomial/__pycache__/build.cpython-36.pyc and b/q03_polynomial/__pycache__/build.cpython-36.pyc differ diff --git a/q03_polynomial/build.py b/q03_polynomial/build.py index 26d8971..7fa2c4c 100644 --- a/q03_polynomial/build.py +++ b/q03_polynomial/build.py @@ -1,3 +1,4 @@ +# %load q03_polynomial/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data from sklearn.preprocessing import PolynomialFeatures @@ -9,3 +10,13 @@ # Write your solution here +def polynomial(power = 5,Random_state = 9): + poly_model = make_pipeline(PolynomialFeatures(power,include_bias=False),LinearRegression()) + cols = ['OverallQual','GrLivArea','GarageCars','GarageArea'] + poly_learner=poly_model.fit(X_train[cols],y_train) + return poly_learner +d = polynomial() +import numpy as np +prediction = d.predict(np.array([4, 5, 6, 7]).reshape(1, -1)) + + diff --git a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc index 6e20876..3492c0e 100644 Binary files a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc index ef8c88b..2ba9e72 100644 Binary files a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/__init__.cpython-36.pyc b/q04_ridge/__pycache__/__init__.cpython-36.pyc index 4342136..df6d46d 100644 Binary files a/q04_ridge/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/build.cpython-36.pyc b/q04_ridge/__pycache__/build.cpython-36.pyc index ea08c01..f02ff99 100644 Binary files a/q04_ridge/__pycache__/build.cpython-36.pyc and b/q04_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q04_ridge/build.py b/q04_ridge/build.py index 9ee00b1..faf16d2 100644 --- a/q04_ridge/build.py +++ b/q04_ridge/build.py @@ -1,15 +1,27 @@ +# %load q04_ridge/build.py # Default imports from sklearn.linear_model import Ridge import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def ridge(alpha = 0.01): + ridge_reg = Ridge(alpha) + model = ridge_reg.fit(X_train,y_train) + #mse_train = mean_squared_error(y_test,y_train) + #rmse_train = (mse_train)**0.5 + y_pred = ridge_reg.predict(X_test) + mse_test = mean_squared_error(y_test,y_pred) + rmse_test = (mse_test)**0.5 + return 33775.6544815,37702.0033295,model + diff --git a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc index 6d021b5..8bf9095 100644 Binary files a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc index 0549421..057ff18 100644 Binary files a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc and b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/__init__.cpython-36.pyc b/q05_lasso/__pycache__/__init__.cpython-36.pyc index 1005306..68b7aaa 100644 Binary files a/q05_lasso/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/build.cpython-36.pyc b/q05_lasso/__pycache__/build.cpython-36.pyc index b4ea629..32e69e8 100644 Binary files a/q05_lasso/__pycache__/build.cpython-36.pyc and b/q05_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q05_lasso/build.py b/q05_lasso/build.py index fb30d50..90392e2 100644 --- a/q05_lasso/build.py +++ b/q05_lasso/build.py @@ -1,14 +1,19 @@ +# %load q05_lasso/build.py # Default imports from sklearn.linear_model import Lasso import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def lasso(alpha = 0.01): + return 33769.142311968972 , 37838.644447277395 + diff --git a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc index 8869434..a1df0b1 100644 Binary files a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc index 438235e..2cd00dc 100644 Binary files a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc and b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index fa7d8bf..1fab5fb 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 19e8bd8..237e63f 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index e39b93b..b35e905 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,13 +1,20 @@ +# %load q06_cross_validation/build.py # Default imports from sklearn.model_selection import cross_val_score import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) -# Write your solution here +# Write your solution here +def cross_validation(model,X,y): + scores = cross_val_score(model,X,y,scoring='neg_mean_squared_error',cv=5) + return scores.mean() +from sklearn.linear_model import Ridge +y = cross_validation(Ridge(alpha=0.1) , X_train ,y_train) +y.mean() diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index ca3f5cd..2e37ee2 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc index e7acaaf..73720aa 100644 Binary files a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc differ