1212from src .settings import LOGS_ROOT , UTCNOW
1313from src .ts import load_ABIDE1 , TSQuantileTransformer
1414
15+ import wandb
16+ import time
17+ from collections import defaultdict
18+
1519
1620class Experiment (IExperiment ):
1721 def __init__ (self , quantile : bool ) -> None :
1822 super ().__init__ ()
1923 self ._quantile : bool = quantile
2024 self ._trial : optuna .Trial = None
2125
26+ # init wandb logger
27+ self .wandbLogger : wandb .run = wandb .init (project = "tune_ts" , name = "baseline" )
28+ # for timer
29+ self .start : float = 0.0
30+ # set classifiers
31+ self .classifiers = [
32+ "LogisticRegression" ,
33+ "SGDClassifier" ,
34+ "AdaBoostClassifier" ,
35+ "RandomForestClassifier" ,
36+ ]
37+ # initialize tables for different classifiers
38+ self .wandb_tables : dict = {}
39+ for classifier in self .classifiers :
40+ self .wandb_tables [classifier ] = wandb .Table (columns = ["score" , "time" ])
41+
2242 def on_tune_start (self ):
2343 features , labels = load_ABIDE1 ()
2444 X_train , X_test , y_train , y_test = train_test_split (
@@ -43,20 +63,14 @@ def on_experiment_start(self, exp: "IExperiment"):
4363 # setup model
4464 clf_type = self ._trial .suggest_categorical (
4565 "classifier" ,
46- choices = [
47- "LogisticRegression" ,
48- "SGDClassifier" ,
49- "AdaBoostClassifier" ,
50- "RandomForestClassifier" ,
51- ],
66+ choices = self .classifiers ,
5267 )
68+
5369 if clf_type == "LogisticRegression" :
5470 solver = self ._trial .suggest_categorical (
5571 "classifier.logistic.solver" , ["liblinear" , "lbfgs" ]
5672 )
57- decay = self ._trial .suggest_loguniform (
58- "classifier.logistic.C" , low = 1e-3 , high = 1e3
59- )
73+ decay = self ._trial .suggest_loguniform ("classifier.logistic.C" , low = 1e-3 , high = 1e3 )
6074 if solver == "liblinear" :
6175 penalty = self ._trial .suggest_categorical (
6276 "classifier.logistic.penalty" , ["l1" , "l2" ]
@@ -71,9 +85,7 @@ def on_experiment_start(self, exp: "IExperiment"):
7185 penalty = self ._trial .suggest_categorical (
7286 "classifier.sgd.penalty" , ["l1" , "l2" , "elasticnet" ]
7387 )
74- alpha = self ._trial .suggest_loguniform (
75- "classifier.sgd.alpha" , low = 1e-4 , high = 1e-2
76- )
88+ alpha = self ._trial .suggest_loguniform ("classifier.sgd.alpha" , low = 1e-4 , high = 1e-2 )
7789 self .classifier = SGDClassifier (
7890 loss = "modified_huber" ,
7991 penalty = penalty ,
@@ -103,9 +115,7 @@ def run_dataset(self) -> None:
103115 self .classifier .fit (X_train , y_train )
104116 y_pred = self .classifier .predict (X_test )
105117 y_score = self .classifier .predict_proba (X_test )
106- report = get_classification_report (
107- y_true = y_test , y_pred = y_pred , y_score = y_score , beta = 0.5
108- )
118+ report = get_classification_report (y_true = y_test , y_pred = y_pred , y_score = y_score , beta = 0.5 )
109119 for stats_type in [0 , 1 , "macro" , "weighted" ]:
110120 stats = report .loc [stats_type ]
111121 for key , value in stats .items ():
@@ -119,14 +129,43 @@ def on_experiment_end(self, exp: "IExperiment") -> None:
119129 self ._score = self .experiment_metrics [1 ]["ABIDE1" ]["score" ]
120130
121131 def _objective (self , trial ) -> float :
132+ # start timer
133+ self .start = time .process_time ()
134+
122135 self ._trial = trial
123136 self .run ()
137+
138+ # log overall score
139+ self .wandbLogger .log ({"overall score" : self ._score })
140+
141+ self .wandb_tables [type (self .classifier ).__name__ ].add_data (
142+ self ._score , time .process_time () - self .start
143+ )
144+
124145 return self ._score
125146
126147 def tune (self , n_trials : int ):
127148 self .on_tune_start ()
128149 self .study = optuna .create_study (direction = "maximize" )
129150 self .study .optimize (self ._objective , n_trials = n_trials , n_jobs = 1 )
151+
152+ # log score and experiment time
153+ for classifier in self .classifiers :
154+ tableLength = len (self .wandb_tables [classifier ].get_column ("score" ))
155+ self .wandb_tables [classifier ].add_column (name = "step" , data = list (range (tableLength )))
156+
157+ line_series = wandb .plot .line_series (
158+ xs = self .wandb_tables [classifier ].get_column ("step" ),
159+ ys = [
160+ self .wandb_tables [classifier ].get_column ("score" ),
161+ self .wandb_tables [classifier ].get_column ("time" ),
162+ ],
163+ keys = ["score" , "time" ],
164+ title = classifier ,
165+ xname = "step" ,
166+ )
167+ wandb .log ({classifier : line_series })
168+
130169 logfile = f"{ LOGS_ROOT } /{ UTCNOW } -ts-baseline-q{ self ._quantile } .optuna.csv"
131170 df = self .study .trials_dataframe ()
132171 df .to_csv (logfile , index = False )
0 commit comments