added wandb logger

Pavel Popov · Pavel Popov · commit 36e2ec02e76c · 2022-04-13T21:56:10.000-04:00
diff --git a/src/scripts/tune_ts_baseline.py b/src/scripts/tune_ts_baseline.py
@@ -12,13 +12,33 @@
 from src.settings import LOGS_ROOT, UTCNOW
 from src.ts import load_ABIDE1, TSQuantileTransformer
 
+import wandb
+import time
+from collections import defaultdict
+
 
 class Experiment(IExperiment):
     def __init__(self, quantile: bool) -> None:
         super().__init__()
         self._quantile: bool = quantile
         self._trial: optuna.Trial = None
 
+        # init wandb logger
+        self.wandbLogger: wandb.run = wandb.init(project="tune_ts", name="baseline")
+        # for timer
+        self.start: float = 0.0
+        # set classifiers
+        self.classifiers = [
+            "LogisticRegression",
+            "SGDClassifier",
+            "AdaBoostClassifier",
+            "RandomForestClassifier",
+        ]
+        # initialize tables for different classifiers
+        self.wandb_tables: dict = {}
+        for classifier in self.classifiers:
+            self.wandb_tables[classifier] = wandb.Table(columns=["score", "time"])
+
     def on_tune_start(self):
         features, labels = load_ABIDE1()
         X_train, X_test, y_train, y_test = train_test_split(
@@ -43,20 +63,14 @@ def on_experiment_start(self, exp: "IExperiment"):
         # setup model
         clf_type = self._trial.suggest_categorical(
             "classifier",
-            choices=[
-                "LogisticRegression",
-                "SGDClassifier",
-                "AdaBoostClassifier",
-                "RandomForestClassifier",
-            ],
+            choices=self.classifiers,
         )
+
         if clf_type == "LogisticRegression":
             solver = self._trial.suggest_categorical(
                 "classifier.logistic.solver", ["liblinear", "lbfgs"]
             )
-            decay = self._trial.suggest_loguniform(
-                "classifier.logistic.C", low=1e-3, high=1e3
-            )
+            decay = self._trial.suggest_loguniform("classifier.logistic.C", low=1e-3, high=1e3)
             if solver == "liblinear":
                 penalty = self._trial.suggest_categorical(
                     "classifier.logistic.penalty", ["l1", "l2"]
@@ -71,9 +85,7 @@ def on_experiment_start(self, exp: "IExperiment"):
             penalty = self._trial.suggest_categorical(
                 "classifier.sgd.penalty", ["l1", "l2", "elasticnet"]
             )
-            alpha = self._trial.suggest_loguniform(
-                "classifier.sgd.alpha", low=1e-4, high=1e-2
-            )
+            alpha = self._trial.suggest_loguniform("classifier.sgd.alpha", low=1e-4, high=1e-2)
             self.classifier = SGDClassifier(
                 loss="modified_huber",
                 penalty=penalty,
@@ -103,9 +115,7 @@ def run_dataset(self) -> None:
         self.classifier.fit(X_train, y_train)
         y_pred = self.classifier.predict(X_test)
         y_score = self.classifier.predict_proba(X_test)
-        report = get_classification_report(
-            y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5
-        )
+        report = get_classification_report(y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5)
         for stats_type in [0, 1, "macro", "weighted"]:
             stats = report.loc[stats_type]
             for key, value in stats.items():
@@ -119,14 +129,43 @@ def on_experiment_end(self, exp: "IExperiment") -> None:
         self._score = self.experiment_metrics[1]["ABIDE1"]["score"]
 
     def _objective(self, trial) -> float:
+        # start timer
+        self.start = time.process_time()
+
         self._trial = trial
         self.run()
+
+        # log overall score
+        self.wandbLogger.log({"overall score": self._score})
+
+        self.wandb_tables[type(self.classifier).__name__].add_data(
+            self._score, time.process_time() - self.start
+        )
+
         return self._score
 
     def tune(self, n_trials: int):
         self.on_tune_start()
         self.study = optuna.create_study(direction="maximize")
         self.study.optimize(self._objective, n_trials=n_trials, n_jobs=1)
+
+        # log score and experiment time
+        for classifier in self.classifiers:
+            tableLength = len(self.wandb_tables[classifier].get_column("score"))
+            self.wandb_tables[classifier].add_column(name="step", data=list(range(tableLength)))
+
+            line_series = wandb.plot.line_series(
+                xs=self.wandb_tables[classifier].get_column("step"),
+                ys=[
+                    self.wandb_tables[classifier].get_column("score"),
+                    self.wandb_tables[classifier].get_column("time"),
+                ],
+                keys=["score", "time"],
+                title=classifier,
+                xname="step",
+            )
+            wandb.log({classifier: line_series})
+
         logfile = f"{LOGS_ROOT}/{UTCNOW}-ts-baseline-q{self._quantile}.optuna.csv"
         df = self.study.trials_dataframe()
         df.to_csv(logfile, index=False)
diff --git a/src/scripts/tune_ts_lstm.py b/src/scripts/tune_ts_lstm.py
@@ -16,6 +16,8 @@
 from src.settings import LOGS_ROOT, UTCNOW
 from src.ts import load_ABIDE1, TSQuantileTransformer
 
+import wandb
+
 
 class LSTM(nn.Module):
     def __init__(
@@ -28,9 +30,7 @@ def __init__(
         super(LSTM, self).__init__()
         self.hidden_size = hidden_size
         self.bidirectional = bidirectional
-        self.lstm = nn.LSTM(
-            hidden_size=hidden_size, bidirectional=bidirectional, **kwargs
-        )
+        self.lstm = nn.LSTM(hidden_size=hidden_size, bidirectional=bidirectional, **kwargs)
         self.fc = nn.Sequential(
             nn.Dropout(p=fc_dropout),
             nn.Linear(2 * hidden_size if bidirectional else hidden_size, 1),
@@ -60,6 +60,9 @@ def __init__(self, quantile: bool, max_epochs: int, logdir: str) -> None:
         self.max_epochs = max_epochs
         self.logdir = logdir
 
+        # init wandb logger
+        self.wandbLogger: wandb.run = wandb.init(project="tune_ts", name="lstm")
+
     def on_tune_start(self):
         features, labels = load_ABIDE1()
         X_train, X_test, y_train, y_test = train_test_split(
@@ -105,9 +108,7 @@ def on_experiment_start(self, exp: "IExperiment"):
             hidden_size=self._trial.suggest_int("lstm.hidden_size", 32, 256, log=True),
             num_layers=self._trial.suggest_int("lstm.num_layers", 1, 4),
             batch_first=True,
-            bidirectional=self._trial.suggest_categorical(
-                "lstm.bidirectional", [True, False]
-            ),
+            bidirectional=self._trial.suggest_categorical("lstm.bidirectional", [True, False]),
             fc_dropout=self._trial.suggest_uniform("lstm.fc_dropout", 0.1, 0.9),
         )
         self.criterion = nn.BCEWithLogitsLoss()
@@ -160,9 +161,7 @@ def run_dataset(self) -> None:
         y_test = np.hstack(all_targets)
         y_score = np.hstack(all_scores)
         y_pred = (y_score > 0.5).astype(np.int32)
-        report = get_classification_report(
-            y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5
-        )
+        report = get_classification_report(y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5)
         for stats_type in [0, 1, "macro", "weighted"]:
             stats = report.loc[stats_type]
             for key, value in stats.items():
@@ -182,6 +181,10 @@ def on_experiment_end(self, exp: "IExperiment") -> None:
     def _objective(self, trial) -> float:
         self._trial = trial
         self.run()
+
+        # log score
+        self.wandbLogger.log({"score": self._score})
+
         return self._score
 
     def tune(self, n_trials: int):
diff --git a/src/scripts/tune_ts_mlp.py b/src/scripts/tune_ts_mlp.py
@@ -16,6 +16,8 @@
 from src.settings import LOGS_ROOT, UTCNOW
 from src.ts import load_ABIDE1, TSQuantileTransformer
 
+import wandb
+
 
 class ResidualBlock(nn.Module):
     def __init__(self, block):
@@ -77,6 +79,9 @@ def __init__(self, quantile: bool, max_epochs: int, logdir: str) -> None:
         self.max_epochs = max_epochs
         self.logdir = logdir
 
+        # init wandb logger
+        self.wandbLogger: wandb.run = wandb.init(project="tune_ts", name="mlp")
+
     def on_tune_start(self):
         features, labels = load_ABIDE1()
         X_train, X_test, y_train, y_test = train_test_split(
@@ -173,9 +178,7 @@ def run_dataset(self) -> None:
         y_test = np.hstack(all_targets)
         y_score = np.hstack(all_scores)
         y_pred = (y_score > 0.5).astype(np.int32)
-        report = get_classification_report(
-            y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5
-        )
+        report = get_classification_report(y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5)
         for stats_type in [0, 1, "macro", "weighted"]:
             stats = report.loc[stats_type]
             for key, value in stats.items():
@@ -195,6 +198,10 @@ def on_experiment_end(self, exp: "IExperiment") -> None:
     def _objective(self, trial) -> float:
         self._trial = trial
         self.run()
+
+        # log score
+        self.wandbLogger.log({"score": self._score})
+
         return self._score
 
     def tune(self, n_trials: int):
diff --git a/src/scripts/tune_ts_transformer.py b/src/scripts/tune_ts_transformer.py
@@ -16,6 +16,8 @@
 from src.settings import LOGS_ROOT, UTCNOW
 from src.ts import load_ABIDE1, TSQuantileTransformer
 
+import wandb
+
 
 class Transformer(nn.Module):
     def __init__(
@@ -56,6 +58,9 @@ def __init__(self, quantile: bool, max_epochs: int, logdir: str) -> None:
         self.max_epochs = max_epochs
         self.logdir = logdir
 
+        # init wandb logger
+        self.wandbLogger: wandb.run = wandb.init(project="tune_ts", name="transformer")
+
     def on_tune_start(self):
         features, labels = load_ABIDE1()
         X_train, X_test, y_train, y_test = train_test_split(
@@ -96,9 +101,7 @@ def on_experiment_start(self, exp: "IExperiment"):
             ),
         }
         # setup model
-        hidden_size = self._trial.suggest_int(
-            "transformer.hidden_size", 4, 128, log=True
-        )
+        hidden_size = self._trial.suggest_int("transformer.hidden_size", 4, 128, log=True)
         num_heads = self._trial.suggest_int("transformer.num_heads", 1, 4)
         self.model = Transformer(
             input_size=53,  # PRIOR
@@ -157,9 +160,7 @@ def run_dataset(self) -> None:
         y_test = np.hstack(all_targets)
         y_score = np.hstack(all_scores)
         y_pred = (y_score > 0.5).astype(np.int32)
-        report = get_classification_report(
-            y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5
-        )
+        report = get_classification_report(y_true=y_test, y_pred=y_pred, y_score=y_score, beta=0.5)
         for stats_type in [0, 1, "macro", "weighted"]:
             stats = report.loc[stats_type]
             for key, value in stats.items():
@@ -179,6 +180,10 @@ def on_experiment_end(self, exp: "IExperiment") -> None:
     def _objective(self, trial) -> float:
         self._trial = trial
         self.run()
+
+        # log score
+        self.wandbLogger.log({"score": self._score})
+
         return self._score
 
     def tune(self, n_trials: int):