Merge pull request #65 from mibaumgartner/load_save

mibaumgartner · web-flow · commit fff41cf775a1 · 2019-02-22T15:02:11.000+01:00
Removed weights_only option
diff --git a/delira/io/torch.py b/delira/io/torch.py
@@ -11,16 +11,10 @@
 if "TORCH" in get_backends():
 
     import torch
-
-    from torchvision import models as t_models
-    from torch import nn
-    from torch.nn import functional as F
-    from torch import optim
-
     from ..models import AbstractPyTorchNetwork
 
     def save_checkpoint(file: str, model=None, optimizers={},
-                        epoch=None, weights_only=True, **kwargs):
+                        epoch=None, **kwargs):
         """
         Save model's parameters
 
@@ -35,9 +29,6 @@ def save_checkpoint(file: str, model=None, optimizers={},
             dictionary containing all optimizers
         epoch : int
             current epoch (will also be pickled)
-        weights_only : bool
-            whether or not to save only the model's weights or also save additional
-            information (for easy loading)
 
         """
         if isinstance(model, torch.nn.DataParallel):
@@ -66,39 +57,16 @@ def save_checkpoint(file: str, model=None, optimizers={},
                  "model": model_state,
                  "epoch": epoch}
 
-        if not weights_only:
-
-            source = inspect.getsource(_model.__class__)
-
-            class_name_model = _model.__class__.__name__
-            class_names_optim = OrderedDict()
-
-            for key in optim_state.keys():
-                class_names_optim[key] = optimizers[key].__class__.__name__
-
-            parent_class = _model.__class__.__mro__[1].__name__
+        torch.save(state, file, **kwargs)
 
-            init_kwargs = _model.init_kwargs
-
-            torch.save({'source': source, 'cls_name_model': class_name_model,
-                        'parent_class': parent_class, 'init_kwargs': init_kwargs,
-                        'state_dict': state, 'cls_name_optim': class_names_optim},
-                       file)
-
-        else:
-            torch.save(state, file)
-
-    def load_checkpoint(file, weights_only=True, **kwargs):
+    def load_checkpoint(file, **kwargs):
         """
         Loads a saved model
 
         Parameters
         ----------
         file : str
             filepath to a file containing a saved model
-        weights_only : bool
-            whether the file contains only weights / only weights should be
-            returned
         **kwargs:
             Additional keyword arguments (passed to torch.load)
             Especially "map_location" is important to change the device the
@@ -107,57 +75,12 @@ def load_checkpoint(file, weights_only=True, **kwargs):
         Returns
         -------
         OrderedDict
-            checkpoint state_dict if `weights_only=True`
-        torch.nn.Module, OrderedDict, int
-            Model, Optimizers, epoch with loaded state_dicts if `weights_only=False`
+            checkpoint state_dict
 
         """
-        if weights_only:
-            return torch.load(file, **kwargs)
-        else:
-            loaded_dict = torch.load(file, **kwargs)
-
-            # import parent class
-            exec("from ..models import " + loaded_dict["parent_class"])
-
-            # execute pickled code (to get access to class)
-            exec(loaded_dict["source"])
-
-            # create class instance (default device: CPU)
-            exec("model = " + loaded_dict["cls_name_model"] +
-                 "(**loaded_dict['init_kwargs'])")
-
-            # check for "map_location" kwarg and use device of first weight tensor
-            # as default argument (weight tensors should be all on same device)
-            if loaded_dict["state_dict"]["model"]:
-                default_device = next(
-                    islice(
-                        loaded_dict["state_dict"]["model"].values(), 1)
-                ).device
-            else:
-                default_device = torch.device("cpu")
-
-            map_location = kwargs.get("map_location",
-                                      # use slicing instead of converting to list
-                                      # to avoid memory overhead
-                                      default_device)
-
-            # push created class from CPU to suitable device
-            locals()['model'].to(map_location)
-
-            locals()['model'].load_state_dict(
-                loaded_dict["state_dict"]["model"])
-
-            optims = OrderedDict()
-
-            for key in loaded_dict["cls_name_optim"].keys():
-                exec("_optim = optim.%s(models.parameters())" %
-                     loaded_dict["cls_name_optim"][key])
-
-                optims[key] = locals()['_optim']
-
-            for key, val in optims.items():
-                optims[key] = val.load_state_dict(
-                    loaded_dict["state_dict"]["optimizer"][key])
+        checkpoint = torch.load(file, **kwargs)
 
-            return locals()['model'], optims, loaded_dict["state_dict"]["epoch"]
+        if not all([_key in checkpoint
+                    for _key in ["model", "optimizer", "epoch"]]):
+            return checkpoint['state_dict']
+        return checkpoint
diff --git a/delira/training/pytorch_trainer.py b/delira/training/pytorch_trainer.py
@@ -187,21 +187,12 @@ def _setup(self, network, optim_fn, optimizer_cls, optimizer_params,
 
                     logger.info("Attempting to load state from previous \
                                 training from %s" % latest_state_path)
-
                     try:
-                        self.update_state(latest_state_path,
-                                          weights_only=False)
+                        self.update_state(latest_state_path)
                     except KeyError:
-                        try:
-                            self.update_state(latest_state_path,
-                                              weights_only=True)
-                            self.start_epoch = max(
-                                latest_epoch, self.start_epoch)
-
-                        except KeyError:
-                            logger.warn("Previous State could not be loaded, \
-                                        although it exists.Training will be \
-                                        restarted")
+                        logger.warn("Previous State could not be loaded, \
+                                    although it exists.Training will be \
+                                    restarted")
 
             # asssign closure and prepare batch from network
             self.closure_fn = network.closure
@@ -363,8 +354,7 @@ def _at_training_end(self):
 
                 # load best model and return it
                 self.update_state(os.path.join(self.save_path,
-                                               'checkpoint_best.pth'),
-                                  weights_only=True
+                                               'checkpoint_best.pth')
                                   )
 
             return self.module
@@ -394,8 +384,7 @@ def _at_epoch_begin(self, metrics_val, val_score_key, epoch, num_epochs,
             for cb in self._callbacks:
                 self._update_state(cb.at_epoch_begin(self, val_metrics=metrics_val,
                                                      val_score_key=val_score_key,
-                                                     curr_epoch=epoch),
-                                   weights_only=False)
+                                                     curr_epoch=epoch))
 
         def _at_epoch_end(self, metrics_val, val_score_key, epoch, is_best,
                           **kwargs):
@@ -423,18 +412,17 @@ def _at_epoch_end(self, metrics_val, val_score_key, epoch, is_best,
             for cb in self._callbacks:
                 self._update_state(cb.at_epoch_end(self, val_metrics=metrics_val,
                                                    val_score_key=val_score_key,
-                                                   curr_epoch=epoch),
-                                   weights_only=False)
+                                                   curr_epoch=epoch))
 
             if epoch % self.save_freq == 0:
                 self.save_state(os.path.join(self.save_path,
                                              "checkpoint_epoch_%d.pth" % epoch),
-                                epoch, False)
+                                epoch)
 
             if is_best:
                 self.save_state(os.path.join(self.save_path,
                                              "checkpoint_best.pth"),
-                                epoch, False)
+                                epoch)
 
         def _train_single_epoch(self, batchgen: MultiThreadedAugmenter, epoch):
             """
@@ -597,7 +585,7 @@ def predict(self, batchgen, batch_size=None):
 
             return outputs_all, labels_all, val_dict
 
-        def save_state(self, file_name, epoch, weights_only=False, **kwargs):
+        def save_state(self, file_name, epoch, **kwargs):
             """
             saves the current state via :func:`delira.io.torch.save_checkpoint`
 
@@ -607,28 +595,24 @@ def save_state(self, file_name, epoch, weights_only=False, **kwargs):
                 filename to save the state to
             epoch : int
                 current epoch (will be saved for mapping back)
-            weights_only : bool
-                whether to store only weights (default: False)
             *args :
                 positional arguments
             **kwargs :
                 keyword arguments
 
             """
-            save_checkpoint(file_name, self.module, self.optimizers, weights_only,
-                            **kwargs)
+            save_checkpoint(file_name, self.module, self.optimizers,
+                            epoch=epoch, **kwargs)
 
         @staticmethod
-        def load_state(file_name, weights_only=True, **kwargs):
+        def load_state(file_name, **kwargs):
             """
             Loads the new state from file via :func:`delira.io.torch.load_checkpoint`
 
             Parameters
             ----------
             file_name : str
                 the file to load the state from
-            weights_only : bool
-                whether file contains stored weights only (default: False)
             **kwargs : keyword arguments
 
             Returns
@@ -637,24 +621,16 @@ def load_state(file_name, weights_only=True, **kwargs):
                 new state
 
             """
-            if weights_only:
-                return load_checkpoint(file_name, weights_only, **kwargs)
-            else:
-                model, optimizer, epoch = load_checkpoint(file_name, weights_only,
-                                                          **kwargs)
-                return {"module": model, "optimizers": optimizer,
-                        "start_epoch": epoch}
+            return load_checkpoint(file_name, **kwargs)
 
-        def update_state(self, file_name, weights_only=True, *args, **kwargs):
+        def update_state(self, file_name, *args, **kwargs):
             """
             Update internal state from a loaded state
 
             Parameters
             ----------
             file_name : str
                 file containing the new state to load
-            weights_only : bool
-                whether to update only weights or notS
             *args :
                 positional arguments
             **kwargs :
@@ -666,46 +642,35 @@ def update_state(self, file_name, weights_only=True, *args, **kwargs):
                 the trainer with a modified state
 
             """
-            self._update_state(self.load_state(file_name, weights_only,
-                                               *args, **kwargs), weights_only)
+            self._update_state(self.load_state(file_name, *args, **kwargs))
 
-        def _update_state(self, new_state, weights_only=True):
+        def _update_state(self, new_state):
             """
             Update the state from a given new state
 
             Parameters
             ----------
             new_state : dict
                 new state to update internal state from
-            weights_only : bool
-                whether to update weights only from statedict or update 
-                everything
 
             Returns
             -------
             :class:`PyTorchNetworkTrainer`
                 the trainer with a modified state
 
-            # """
+            """
             # print(",".join(new_state.keys()))
 
-            if weights_only:
-                if "model" in new_state:
-                    model_state = new_state["model"]
-                else:
-                    model_state = new_state
-
-                self.module.load_state_dict(model_state)
+            if "model" in new_state:
+                self.module.load_state_dict(new_state.pop("model"))
 
-                if "optimizer" in new_state and new_state["optimizer"]:
-                    for key in self.optimizers.keys():
-                        self.optimizers[key].load_state_dict(
-                            new_state["optimizer"][key])
+            if "optimizer" in new_state and new_state["optimizer"]:
+                optim_state = new_state.pop("optimizer")
+                for key in self.optimizers.keys():
+                    self.optimizers[key].load_state_dict(
+                        optim_state[key])
 
-                if "epoch" in new_state:
-                    self.start_epoch = new_state["epoch"]
+            if "epoch" in new_state:
+                self.start_epoch = new_state.pop("epoch")
 
-                return self
-
-            else:
-                return super()._update_state(new_state)
+            return super()._update_state(new_state)
diff --git a/tests/io/test_torch.py b/tests/io/test_torch.py
@@ -25,14 +25,10 @@ def _build_model(in_channels, n_outputs):
                 torch.nn.Linear(64, n_outputs)
             )
 
-
     net = DummyNetwork(32, 1)
     torch_save_checkpoint("./model.pt", model=net)
-    # fails with weights_only=False only in pytest-mode not in normal execution
-    torch_load_checkpoint("./model.pt", weights_only=True)
+    assert torch_load_checkpoint("./model.pt")
 
-    torch_save_checkpoint("./model.pt", net, weights_only=True)
-    assert torch_load_checkpoint("./model.pt", weights_only=True)
 
 if __name__ == '__main__':
     test_load_save()