Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ results/
ckp/
checkpoints/
*.swp
wandb/

Dockerfile
build_dgx.sh
Expand Down
26 changes: 25 additions & 1 deletion micromind/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@

# This is used ONLY if you are not using argparse to get the hparams
default_cfg = {
"project_name": "micromind",
"output_folder": "results",
"experiment_name": "micromind_exp",
"opt": "adam", # this is ignored if you are overriding the configure_optimizers
"lr": 0.001, # this is ignored if you are overriding the configure_optimizers
"debug": False,
"log_wandb": False,
"wandb_resume": "auto", # ["allow", "must", "never", "auto" or None]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what is the best way to do this. But we need to define better the effect of each field of default_cfg.

Maybe a long comment ("""xx""") after the closing bracket?

}


Expand Down Expand Up @@ -381,14 +384,26 @@ def compute_macs(self, input_shape: Union[List, Tuple]):

def on_train_start(self):
"""Initializes the optimizer, modules and puts the networks on the right
devices. Optionally loads checkpoint if already present.
devices. Optionally loads checkpoint if already present. It also start wandb
logger if selected.

This function gets executed at the beginning of every training.
"""

# pass debug status to checkpointer
self.checkpointer.debug = self.hparams.debug

if self.hparams.log_wandb:
import wandb

self.wlog = wandb.init(
project=self.hparams.project_name,
name=self.hparams.experiment_name,
resume=self.hparams.wandb_resume,
id=self.hparams.experiment_name,
config=self.hparams,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here we can check if the configuration provides any extra arguments to be passed to the init. eventually, we load them (check the usage of ** operator).

)

init_opt = self.configure_optimizers()
if isinstance(init_opt, list) or isinstance(init_opt, tuple):
self.opt, self.lr_sched = init_opt
Expand Down Expand Up @@ -449,6 +464,8 @@ def init_devices(self):

def on_train_end(self):
"""Runs at the end of each training. Cleans up before exiting."""
if self.hparams.log_wandb:
self.wlog.finish()
pass

def eval(self):
Expand Down Expand Up @@ -531,6 +548,9 @@ def train(
# ok for cos_lr
self.lr_sched.step()

if self.hparams.log_wandb:
self.wlog.log({"lr": self.lr_sched.get_last_lr()})

for m in self.metrics:
if (
self.current_epoch + 1
Expand Down Expand Up @@ -574,6 +594,10 @@ def train(
else:
val_metrics = train_metrics.update({"val_loss": loss_epoch / (idx + 1)})

if self.hparams.log_wandb: # wandb log
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be wrong, but it would be nicer if we divide the train and valid panes inside the wandb logging. this would look something like val/{metric_name} will be logged instead of val_{metric_name}. ofc this change only applies to the .log call and should not be propagated to the entire core.

self.wlog.log(train_metrics)
self.wlog.log(val_metrics)

if e >= 1 and self.debug:
break

Expand Down