From ac2e47e4c25d3010b77f577e3c1301805c7d3968 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Mon, 12 Nov 2018 12:13:14 +0000 Subject: [PATCH 01/32] Starting VCL notebook --- .gitignore | 1 + .../Variational Continual Learning.ipynb | 323 ++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 examples/notebooks/Variational Continual Learning.ipynb diff --git a/.gitignore b/.gitignore index 4dccaa3..2bc8899 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ docs/_autosummary docs/_build docs/generated docs/examples +venv diff --git a/examples/notebooks/Variational Continual Learning.ipynb b/examples/notebooks/Variational Continual Learning.ipynb new file mode 100644 index 0000000..c0c2078 --- /dev/null +++ b/examples/notebooks/Variational Continual Learning.ipynb @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import gzip\n", + "import sys\n", + "\n", + "import mxfusion as mf\n", + "import mxnet as mx\n", + "\n", + "import logging\n", + "logging.getLogger().setLevel(logging.DEBUG) # logging to stdout\n", + "\n", + "# Set the compute context, GPU is available otherwise CPU\n", + "ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class SplitMnistGenerator:\n", + " def __init__(self, data, batch_size):\n", + " self.data = data\n", + " self.batch_size = batch_size\n", + "\n", + " def __iter__(self):\n", + " for i in range(5):\n", + " idx_train_0 = np.where(self.data['train_label'] == i * 2)[0]\n", + " idx_train_1 = np.where(self.data['train_label'] == i * 2 + 1)[0]\n", + " idx_test_0 = np.where(self.data['test_label'] == i * 2)[0]\n", + " idx_test_1 = np.where(self.data['test_label'] == i * 2 + 1)[0]\n", + " \n", + " x_train = np.vstack((self.data['train_data'][idx_train_0], self.data['train_data'][idx_train_1]))\n", + " y_train = np.vstack((np.ones((idx_train_0.shape[0], 1)), -np.ones((idx_train_1.shape[0], 1))))\n", + "\n", + " x_test = np.vstack((self.data['test_data'][idx_test_0], self.data['test_data'][idx_test_1]))\n", + " y_test = np.vstack((np.ones((idx_test_0.shape[0], 1)), -np.ones((idx_test_1.shape[0], 1))))\n", + " \n", + " batch_size = x_train.shape[0] if self.batch_size is None else self.batch_size \n", + " train_iter = mx.io.NDArrayIter(x_train, y_train, batch_size, shuffle=True)\n", + "\n", + " batch_size = x_test.shape[0] if self.batch_size is None else self.batch_size \n", + " test_iter = mx.io.NDArrayIter(x_test, y_test, batch_size)\n", + " \n", + " yield train_iter, test_iter\n", + " return\n", + "\n", + "mnist = mx.test_utils.get_mnist()\n", + "in_dim = np.prod(mnist['train_data'][0].shape)\n", + "\n", + "gen = SplitMnistGenerator(mnist, batch_size=None)\n", + "for task_id, (train, test) in enumerate(gen):\n", + " print(\"Task\", task_id)\n", + " print(\"Train data shape\" ,train.data[0][1].shape)\n", + " print(\"Train label shape\" ,train.label[0][1].shape)\n", + " print(\"Test data shape\" ,test.data[0][1].shape)\n", + " print(\"Test label shape\" ,test.label[0][1].shape)\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def rand_from_batch(x_coreset, y_coreset, x_train, y_train, coreset_size):\n", + " \"\"\" Random coreset selection \"\"\"\n", + " # Randomly select from (x_train, y_train) and add to current coreset (x_coreset, y_coreset)\n", + " idx = np.random.choice(x_train.shape[0], coreset_size, False)\n", + " x_coreset.append(x_train[idx,:])\n", + " y_coreset.append(y_train[idx,:])\n", + " x_train = np.delete(x_train, idx, axis=0)\n", + " y_train = np.delete(y_train, idx, axis=0)\n", + " return x_coreset, y_coreset, x_train, y_train \n", + "\n", + "def k_center(x_coreset, y_coreset, x_train, y_train, coreset_size):\n", + " \"\"\" K-center coreset selection \"\"\"\n", + " # Select K centers from (x_train, y_train) and add to current coreset (x_coreset, y_coreset)\n", + " dists = np.full(x_train.shape[0], np.inf)\n", + " current_id = 0\n", + " dists = update_distance(dists, x_train, current_id)\n", + " idx = [ current_id ]\n", + "\n", + " for i in range(1, coreset_size):\n", + " current_id = np.argmax(dists)\n", + " dists = update_distance(dists, x_train, current_id)\n", + " idx.append(current_id)\n", + "\n", + " x_coreset.append(x_train[idx,:])\n", + " y_coreset.append(y_train[idx,:])\n", + " x_train = np.delete(x_train, idx, axis=0)\n", + " y_train = np.delete(y_train, idx, axis=0)\n", + " return x_coreset, y_coreset, x_train, y_train\n", + "\n", + "def update_distance(dists, x_train, current_id):\n", + " for i in range(x_train.shape[0]):\n", + " current_dist = np.linalg.norm(x_train[i,:]-x_train[current_id,:])\n", + " dists[i] = np.minimum(current_dist, dists[i])\n", + " return dists" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_vcl(network_shape, no_epochs, data_gen, coreset_method, coreset_size=0, batch_size=None, single_head=True):\n", + " x_coresets, y_coresets = [], []\n", + " x_testsets, y_testsets = [], []\n", + "\n", + " all_acc = np.array([])\n", + "\n", + " for task_id, (train, test) in enumerate(data_gen):\n", + " x_testsets.append(test.data[0][1])\n", + " y_testsets.append(test.label[0][1])\n", + "\n", + " # Set the readout head to train\n", + " head = 0 if single_head else task_id\n", + " # bsize = x_train.shape[0] if (batch_size is None) else batch_size\n", + "\n", + " # Train network with maximum likelihood to initialize first model\n", + " if task_id == 0:\n", + " ml_model = VanillaNN(network_shape)\n", + " ml_model.train(x_train, y_train, task_id, no_epochs, bsize)\n", + " mf_weights = ml_model.get_weights()\n", + " mf_variances = None\n", + " ml_model.close_session()\n", + "\n", + " # Select coreset if needed\n", + " if coreset_size > 0:\n", + " x_coresets, y_coresets, x_train, y_train = coreset_method(x_coresets, y_coresets, x_train, y_train, coreset_size)\n", + "\n", + " # Train on non-coreset data\n", + " mf_model = MFVINN(network_shape, prev_means=mf_weights, prev_log_variances=mf_variances)\n", + " mf_model.train(x_train, y_train, head, no_epochs, bsize)\n", + " mf_weights, mf_variances = mf_model.get_weights()\n", + "\n", + " # Incorporate coreset data and make prediction\n", + " acc = utils.get_scores(mf_model, x_testsets, y_testsets, x_coresets, y_coresets, hidden_size, no_epochs, single_head, batch_size)\n", + " all_acc = utils.concatenate_results(acc, all_acc)\n", + "\n", + " mf_model.close_session()\n", + "\n", + " return all_acc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class BaseNN:\n", + " def __init__(self, network_shape):\n", + " # input and output placeholders\n", + " self.task_idx = mx.sym.Variable(name='task_idx', dtype=np.float32)\n", + " self.net = None\n", + " \n", + " def train(self, train_iter, val_iter, ctx):\n", + " # data = mx.sym.var('data')\n", + " # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height)\n", + "# data = mx.sym.flatten(data=data)\n", + " \n", + " # create a trainable module on compute context\n", + " self.model = mx.mod.Module(symbol=self.net, context=ctx)\n", + " self.model.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)\n", + " init = mx.init.Xavier(factor_type=\"in\", magnitude=2.34)\n", + " self.model.init_params(initializer=init, force_init=True)\n", + " self.model.fit(train_iter, # train data\n", + " eval_data=val_iter, # validation data\n", + " optimizer='adam', # use SGD to train\n", + " optimizer_params={'learning_rate': 0.001}, # use fixed learning rate\n", + " eval_metric='acc', # report accuracy during training\n", + " batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches\n", + " num_epoch=10) # train for at most 50 dataset passes\n", + " # predict accuracy of mlp\n", + " acc = mx.metric.Accuracy()\n", + " self.model.score(test_iter, acc)\n", + " return acc\n", + "\n", + " def prediction_prob(self, test_iter, task_idx):\n", + " # task_idx??\n", + " prob = self.model.predict(test_iter)\n", + " return prob\n", + "\n", + "def log_loss(output, y):\n", + " yhat = logistic(output)\n", + " return - nd.nansum(y * nd.log(yhat) + (1 - y) * nd.log(1 - yhat))\n", + " \n", + "class VanillaNN(BaseNN):\n", + " def __init__(self, network_shape, prev_weights=None, learning_rate=0.001):\n", + " super(VanillaNN, self).__init__(network_shape)\n", + "\n", + " # Create net\n", + " net = mx.gluon.nn.HybridSequential(prefix='vanilla_')\n", + " with net.name_scope():\n", + " for layer in network_shape[1:-1]:\n", + " net.add(mx.gluon.nn.Dense(layer, activation=\"relu\"))\n", + " # Last layer for classification\n", + " net.add(mx.gluon.nn.Dense(network_shape[-1], flatten=True, in_units=network_shape[-2]))\n", + " \n", + " net.initialize(mx.init.Xavier(magnitude=2.34))\n", + " \n", + "# for layer in network_shape[1:-1]:\n", + "# fc = mx.sym.FullyConnected(data=data, num_hidden=layer)\n", + "# act = mx.sym.Activation(data=fc, act_type=\"relu\")\n", + "# data = act\n", + "\n", + "# # Last layer is 1D for binary classifiers\n", + "# fc = mx.sym.FullyConnected(data=act, num_hidden=network_shape[-1])\n", + "# act = mx.sym.Activation(data=fc, act_type=\"relu\")\n", + "# # Log loss\n", + "# self.net = log_loss(act)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Hyperparameters\n", + "network_shape = (in_dim, 256, 256, 2) # binary classification\n", + "batch_size = None\n", + "no_epochs = 120\n", + "single_head = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run vanilla VCL\n", + "mx.random.seed(42)\n", + "np.random.seed(42)\n", + "\n", + "coreset_size = 0\n", + "data_gen = SplitMnistGenerator(mnist, batch_size)\n", + "vcl_result = run_vcl(network_shape, no_epochs, data_gen, rand_from_batch, coreset_size, batch_size, single_head)\n", + "print(vcl_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run random coreset VCL\n", + "mx.random.seed(42)\n", + "np.random.seed(42)\n", + "\n", + "coreset_size = 40\n", + "data_gen = SplitMnistGenerator(mnist, batch_size)\n", + "rand_vcl_result = vcl.run_vcl(hidden_size, no_epochs, data_gen, \n", + " coreset.rand_from_batch, coreset_size, batch_size, single_head)\n", + "print(rand_vcl_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run k-center coreset VCL\n", + "mx.random.seed(42)\n", + "np.random.seed(42)\n", + "\n", + "data_gen = SplitMnistGenerator(mnist, batch_size)\n", + "kcen_vcl_result = vcl.run_vcl(hidden_size, no_epochs, data_gen, \n", + " coreset.k_center, coreset_size, batch_size, single_head)\n", + "print(kcen_vcl_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot average accuracy\n", + "vcl_avg = np.nanmean(vcl_result, 1)\n", + "rand_vcl_avg = np.nanmean(rand_vcl_result, 1)\n", + "kcen_vcl_avg = np.nanmean(kcen_vcl_result, 1)\n", + "utils.plot('results/split.jpg', vcl_avg, rand_vcl_avg, kcen_vcl_avg)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9ce178b5d9b86ed8b4afd363d2ee3f2ca16a95ac Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Mon, 12 Nov 2018 14:34:54 +0000 Subject: [PATCH 02/32] Simplified NN code --- .../Variational Continual Learning.ipynb | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/examples/notebooks/Variational Continual Learning.ipynb b/examples/notebooks/Variational Continual Learning.ipynb index c0c2078..58a5c67 100644 --- a/examples/notebooks/Variational Continual Learning.ipynb +++ b/examples/notebooks/Variational Continual Learning.ipynb @@ -192,35 +192,20 @@ " # task_idx??\n", " prob = self.model.predict(test_iter)\n", " return prob\n", - "\n", - "def log_loss(output, y):\n", - " yhat = logistic(output)\n", - " return - nd.nansum(y * nd.log(yhat) + (1 - y) * nd.log(1 - yhat))\n", " \n", "class VanillaNN(BaseNN):\n", " def __init__(self, network_shape, prev_weights=None, learning_rate=0.001):\n", " super(VanillaNN, self).__init__(network_shape)\n", "\n", " # Create net\n", - " net = mx.gluon.nn.HybridSequential(prefix='vanilla_')\n", - " with net.name_scope():\n", + " self.net = mx.gluon.nn.HybridSequential(prefix='vanilla_')\n", + " with self.net.name_scope():\n", " for layer in network_shape[1:-1]:\n", - " net.add(mx.gluon.nn.Dense(layer, activation=\"relu\"))\n", + " self.net.add(mx.gluon.nn.Dense(layer, activation=\"relu\"))\n", " # Last layer for classification\n", - " net.add(mx.gluon.nn.Dense(network_shape[-1], flatten=True, in_units=network_shape[-2]))\n", - " \n", - " net.initialize(mx.init.Xavier(magnitude=2.34))\n", - " \n", - "# for layer in network_shape[1:-1]:\n", - "# fc = mx.sym.FullyConnected(data=data, num_hidden=layer)\n", - "# act = mx.sym.Activation(data=fc, act_type=\"relu\")\n", - "# data = act\n", - "\n", - "# # Last layer is 1D for binary classifiers\n", - "# fc = mx.sym.FullyConnected(data=act, num_hidden=network_shape[-1])\n", - "# act = mx.sym.Activation(data=fc, act_type=\"relu\")\n", - "# # Log loss\n", - "# self.net = log_loss(act)" + " self.net.add(mx.gluon.nn.Dense(network_shape[-1], flatten=True, in_units=network_shape[-2]))\n", + " self.loss = mx.gluon.loss.SoftmaxCrossEntropyLoss()\n", + " self.net.initialize(mx.init.Xavier(magnitude=2.34))" ] }, { From de7f7ec50ce26d8383924a583534478622cee06a Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Mon, 12 Nov 2018 17:03:13 +0000 Subject: [PATCH 03/32] Start of example code for VCL --- .../coresets.py | 79 +++++++++ .../variational_continual_learning/mnist.py | 31 ++++ .../variational_continual_learning/nets.py | 60 +++++++ .../variational_continual_learning.py | 165 ++++++++++++++++++ 4 files changed, 335 insertions(+) create mode 100644 examples/variational_continual_learning/coresets.py create mode 100644 examples/variational_continual_learning/mnist.py create mode 100644 examples/variational_continual_learning/nets.py create mode 100644 examples/variational_continual_learning/variational_continual_learning.py diff --git a/examples/variational_continual_learning/coresets.py b/examples/variational_continual_learning/coresets.py new file mode 100644 index 0000000..2859b60 --- /dev/null +++ b/examples/variational_continual_learning/coresets.py @@ -0,0 +1,79 @@ +import numpy as np +from abc import ABC, abstractmethod + + +class Coreset(ABC): + """ + Abstract base class for coresets + """ + def __init__(self, coreset_size): + """ + Initialise the coreset + :param coreset_size: Size of the coreset + :type coreset_size: int + """ + self.coreset_size = coreset_size + self.data = [] + self.labels = [] + + @abstractmethod + def update(self, data, labels): + pass + + @staticmethod + def get_merged(coresets): + """ + Get merged data and labels from the list of coresets + :param coresets: list of coresets + :type coresets: list(Coreset) + :return: merged data and labels + """ + merged_data, merged_labels = coresets[0].data, coresets[0].labels + for i in range(1, len(coresets)): + merged_data = np.vstack((merged_data, coresets[i].data)) + merged_labels = np.vstack((merged_labels, coresets[i].labels)) + return merged_data, merged_labels + + +class Random(Coreset): + """ + Randomly select from (data, labels) and add to current coreset + """ + def update(self, data, labels): + idx = np.random.choice(data.shape[0], self.coreset_size, False) + self.data.append(data[idx, :]) + self.labels.append(labels[idx, :]) + data = np.delete(data, idx, axis=0) + labels = np.delete(labels, idx, axis=0) + return data, labels + + +class KCenter(Coreset): + """ + Select k centers from (data, labels) and add to current coreset + """ + def update(self, data, labels): + dists = np.full(data.shape[0], np.inf) + current_id = 0 + + # TODO: This looks horribly inefficient + dists = self.update_distance(dists, data, current_id) + idx = [current_id] + + for i in range(1, self.coreset_size): + current_id = np.argmax(dists) + dists = self.update_distance(dists, data, current_id) + idx.append(current_id) + + self.data.append(data[idx, :]) + self.labels.append(labels[idx, :]) + data = np.delete(data, idx, axis=0) + labels = np.delete(labels, idx, axis=0) + return data, labels + + @staticmethod + def update_distance(dists, data, current_id): + for i in range(data.shape[0]): + current_dist = np.linalg.norm(data[i, :] - data[current_id, :]) + dists[i] = np.minimum(current_dist, dists[i]) + return dists diff --git a/examples/variational_continual_learning/mnist.py b/examples/variational_continual_learning/mnist.py new file mode 100644 index 0000000..0bb1636 --- /dev/null +++ b/examples/variational_continual_learning/mnist.py @@ -0,0 +1,31 @@ +import mxnet as mx +import numpy as np + + +class SplitMnistGenerator: + def __init__(self, data, batch_size): + self.data = data + self.batch_size = batch_size + self.num_tasks = 5 + + def __iter__(self): + for i in range(self.num_tasks): + idx_train_0 = np.where(self.data['train_label'] == i * 2)[0] + idx_train_1 = np.where(self.data['train_label'] == i * 2 + 1)[0] + idx_test_0 = np.where(self.data['test_label'] == i * 2)[0] + idx_test_1 = np.where(self.data['test_label'] == i * 2 + 1)[0] + + x_train = np.vstack((self.data['train_data'][idx_train_0], self.data['train_data'][idx_train_1])) + y_train = np.vstack((np.ones((idx_train_0.shape[0], 1)), -np.ones((idx_train_1.shape[0], 1)))) + + x_test = np.vstack((self.data['test_data'][idx_test_0], self.data['test_data'][idx_test_1])) + y_test = np.vstack((np.ones((idx_test_0.shape[0], 1)), -np.ones((idx_test_1.shape[0], 1)))) + + batch_size = x_train.shape[0] if self.batch_size is None else self.batch_size + train_iter = mx.io.NDArrayIter(x_train, y_train, batch_size, shuffle=True) + + batch_size = x_test.shape[0] if self.batch_size is None else self.batch_size + test_iter = mx.io.NDArrayIter(x_test, y_test, batch_size) + + yield train_iter, test_iter + return diff --git a/examples/variational_continual_learning/nets.py b/examples/variational_continual_learning/nets.py new file mode 100644 index 0000000..bea34b6 --- /dev/null +++ b/examples/variational_continual_learning/nets.py @@ -0,0 +1,60 @@ +import mxnet as mx +import numpy as np +from abc import ABC + + +class BaseNN(ABC): + def __init__(self, network_shape): + # input and output placeholders + self.task_idx = mx.nd.array(name='task_idx', dtype=np.float32) + self.model = None + self.network_shape = network_shape + self.loss = None + + def train(self, train_iter, val_iter, batch_size, ctx): + # data = mx.sym.var('data') + # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height) + # data = mx.sym.flatten(data=data) + + # # create a trainable module on compute context + # self.model = mx.mod.Module(symbol=self.net, context=ctx) + self.model.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label, ctx=ctx) + init = mx.init.Xavier(factor_type="in", magnitude=2.34) + self.model.init_params(initializer=init, force_init=True) + self.model.fit(train_iter, # train data + eval_data=val_iter, # validation data + optimizer='adam', # use SGD to train + optimizer_params={'learning_rate': 0.001}, # use fixed learning rate + eval_metric='acc', # report accuracy during training + batch_end_callback=mx.callback.Speedometer(batch_size, 100), + # output progress for each 100 data batches + num_epoch=10) # train for at most 50 dataset passes + # predict accuracy of mlp + acc = mx.metric.Accuracy() + self.model.score(val_iter, acc) + return acc + + def prediction_prob(self, test_iter, task_idx): + # TODO task_idx?? + prob = self.model.predict(test_iter) + return prob + + +class VanillaNN(BaseNN): + def __init__(self, network_shape, previous_weights=None): + super(VanillaNN, self).__init__(network_shape) + + # Create net + self.net = mx.gluon.nn.HybridSequential(prefix='vanilla_') + with self.net.name_scope(): + for layer in network_shape[1:-1]: + self.net.add(mx.gluon.nn.Dense(layer, activation="relu")) + #  Last layer for classification + self.net.add(mx.gluon.nn.Dense(network_shape[-1], flatten=True, in_units=network_shape[-2])) + self.loss = mx.gluon.loss.SoftmaxCrossEntropyLoss() + + +class MeanFieldNN(BaseNN): + def __init__(self, network_shape, prior_means, prior_log_variances): + super(MeanFieldNN, self).__init__(network_shape) + raise NotImplementedError diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py new file mode 100644 index 0000000..f1474d6 --- /dev/null +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -0,0 +1,165 @@ +import numpy as np +import gzip +import sys + +import mxfusion as mf +import mxnet as mx + +import matplotlib.pyplot as plt + +from examples.variational_continual_learning.mnist import SplitMnistGenerator +from examples.variational_continual_learning.nets import VanillaNN, MeanFieldNN +from examples.variational_continual_learning.coresets import Random, KCenter, Coreset + +import logging +logging.getLogger().setLevel(logging.DEBUG) # logging to stdout + +# Set the compute context, GPU is available otherwise CPU +ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() + + +def set_seeds(seed=42): + mx.random.seed(seed) + np.random.seed(seed) + + +def plot(filename, vcl, rand_vcl, kcen_vcl): + plt.rc('text', usetex=True) + plt.rc('font', family='serif') + + fig = plt.figure(figsize=(7,3)) + ax = plt.gca() + plt.plot(np.arange(len(vcl))+1, vcl, label='VCL', marker='o') + plt.plot(np.arange(len(rand_vcl))+1, rand_vcl, label='VCL + Random Coreset', marker='o') + plt.plot(np.arange(len(kcen_vcl))+1, kcen_vcl, label='VCL + K-center Coreset', marker='o') + ax.set_xticks(range(1, len(vcl)+1)) + ax.set_ylabel('Average accuracy') + ax.set_xlabel('\# tasks') + ax.legend() + + fig.savefig(filename, bbox_inches='tight') + plt.close() + + +class Experiment: + def __init__(self, network_shape, num_epochs, data_generator, + coreset_func, batch_size, single_head): + self.network_shape = network_shape + self.num_epochs = num_epochs + self.data_generator = data_generator + self.coresets = dict((i, coreset_func()) for i in range(gen.num_tasks)) + self.batch_size = batch_size + self.single_head = single_head + self.overall_accuracy = np.array([]) + self.x_test_sets = [] + self.y_test_sets = [] + + def run(self): + self.x_test_sets = [] + self.y_test_sets = [] + + for task_id, (train_iter, test_iter) in enumerate(self.data_generator): + self.x_test_sets.append(test_iter.data[0][1]) + self.y_test_sets.append(test_iter.label[0][1]) + + # Set the readout head to train_iter + head = 0 if self.single_head else task_id + + mean_field_weights = None + mean_field_variances = None + + # Train network with maximum likelihood to initialize first model + if task_id == 0: + vanilla_model = VanillaNN(nn_shape) + vanilla_model.train(train_iter, task_id, self.num_epochs, self.batch_size) + mean_field_weights = vanilla_model.get_weights() + + # Train on non-coreset data + mean_field_model = MeanFieldNN( + nn_shape, prior_means=mean_field_weights, prior_log_variances=mean_field_variances) + mean_field_model.train(train_iter, head, self.num_epochs, self.batch_size) + mean_field_weights, mean_field_variances = mean_field_model.get_weights() + + # Incorporate coreset data and make prediction + acc = self.get_scores(mean_field_model) + self.overall_accuracy = self.concatenate_results(acc, self.overall_accuracy) + + def get_scores(self, model): + mf_weights, mf_variances = model.get_weights() + acc = [] + final_model = None + + if self.single_head: + if len(self.coresets) > 0: + x_train, y_train = Coreset.merge(self.coreset) + bsize = x_train.shape[0] if (batch_size is None) else batch_size + final_model = MeanFieldNN(x_train.shape[1], hidden_size, y_train.shape[1], x_train.shape[0], + prev_means=mf_weights, prev_log_variances=mf_variances) + final_model.train(x_train, y_train, 0, no_epochs, bsize) + else: + final_model = model + + for i in range(len(x_testsets)): + if not single_head: + if len(x_coresets) > 0: + x_train, y_train = x_coresets[i], y_coresets[i] + bsize = x_train.shape[0] if (batch_size is None) else batch_size + final_model = MeanFieldNN(x_train.shape[1], hidden_size, y_train.shape[1], x_train.shape[0], + prev_means=mf_weights, prev_log_variances=mf_variances) + final_model.train(x_train, y_train, i, no_epochs, bsize) + else: + final_model = model + + head = 0 if single_head else i + x_test, y_test = x_testsets[i], y_testsets[i] + + pred = final_model.prediction_prob(x_test, head) + pred_mean = np.mean(pred, axis=0) + pred_y = np.argmax(pred_mean, axis=1) + y = np.argmax(y_test, axis=1) + cur_acc = len(np.where((pred_y - y) == 0)[0]) * 1.0 / y.shape[0] + acc.append(cur_acc) + + if len(x_coresets) > 0 and not single_head: + final_model.close_session() + + if len(x_coresets) > 0 and single_head: + final_model.close_session() + + return acc + + @staticmethod + def concatenate_results(score, all_score): + if all_score.size == 0: + all_score = np.reshape(score, (1, -1)) + else: + new_arr = np.empty((all_score.shape[0], all_score.shape[1]+1)) + new_arr[:] = np.nan + new_arr[:,:-1] = all_score + all_score = np.vstack((new_arr, score)) + return all_score + + +if __name__ == "__main__": + # Load data + data = mx.test_utils.get_mnist() + input_dim = np.prod(data['train_data'][0].shape) # Note the data will get flattened later + gen = SplitMnistGenerator(data, batch_size=None) + + nn_shape = (input_dim, 256, 256, 2) # binary classification + experiments = dict( + vanilla=dict(coreset_func=lambda: Random(coreset_size=0), + network_shape=nn_shape, num_epochs=120, single_head=False), + random=dict(coreset_func=lambda: Random(coreset_size=40), + network_shape=nn_shape, num_epochs=120, single_head=False), + k_center=dict(coreset_func=lambda: KCenter(coreset_size=40), + network_shape=nn_shape, num_epochs=120, single_head=False) + ) + + # Run experiments + for name, params in experiments.items(): + print("Running experiment", name) + set_seeds() + experiment = Experiment(batch_size=None, data_generator=gen, **params) + experiment.run() + print(experiment.overall_accuracy) From 97a48d8dbf01ea60e89a396ddcf0129a3a39a0f9 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Tue, 20 Nov 2018 11:31:26 +0000 Subject: [PATCH 04/32] Refactored code out of the main function into classes Runnable code (results not great! - requires debugging) Added callback functions for custom print statements in batch_loop and grad_based_inference --- .../coresets.py | 138 ++++++++-- .../experiment.py | 171 ++++++++++++ .../variational_continual_learning/mnist.py | 75 +++++- .../variational_continual_learning/models.py | 245 ++++++++++++++++++ .../variational_continual_learning/nets.py | 60 ----- .../variational_continual_learning.py | 203 ++++++--------- mxfusion/inference/batch_loop.py | 10 +- mxfusion/inference/grad_based_inference.py | 10 +- 8 files changed, 678 insertions(+), 234 deletions(-) create mode 100644 examples/variational_continual_learning/experiment.py create mode 100644 examples/variational_continual_learning/models.py delete mode 100644 examples/variational_continual_learning/nets.py diff --git a/examples/variational_continual_learning/coresets.py b/examples/variational_continual_learning/coresets.py index 2859b60..a8cd681 100644 --- a/examples/variational_continual_learning/coresets.py +++ b/examples/variational_continual_learning/coresets.py @@ -1,24 +1,74 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ============================================================================== + import numpy as np -from abc import ABC, abstractmethod +import mxnet as mx +from mxnet.io import NDArrayIter, DataIter, DataBatch +from abc import ABCMeta, abstractmethod + + +class MultiIter(DataIter): + def __init__(self, iter_list): + super().__init__() + self.iterators = [] if iter_list is None else iter_list + + def __next__(self): + if len(self.iterators) == 0: + raise StopIteration + + if len(self.iterators) == 1: + return next(self.iterators[0]) + + data = [] + labels = [] + for iterator in self.iterators: + batch = next(iterator) + data.append(batch.data) + labels.append(batch.label) + return DataBatch(data=mx.nd.concat(*data, axis=0), label=mx.nd.concat(*labels, axis=0), pad=0) + def __len__(self): + return len(self.iterators) -class Coreset(ABC): + def reset(self): + for i in self.iterators: + i.reset() + + @property + def provide_data(self): + return [b for b in self.iterators[0].provide_data] + [b for b in self.iterators[1].provide_data] + + @property + def provide_label(self): + return [b for b in self.iterators[0].provide_label] + [b for b in self.iterators[1].provide_label] + + def append(self, iterator): + if not isinstance(iterator, (DataIter, NDArrayIter)): + raise ValueError("Expected either a DataIter or NDArray object, received: {}".format(type(iterator))) + self.iterators.append(iterator) + + +class Coreset(metaclass=ABCMeta): """ Abstract base class for coresets """ - def __init__(self, coreset_size): + def __init__(self): """ Initialise the coreset - :param coreset_size: Size of the coreset - :type coreset_size: int """ - self.coreset_size = coreset_size - self.data = [] - self.labels = [] - - @abstractmethod - def update(self, data, labels): - pass + self.iterator = None + self.reset() @staticmethod def get_merged(coresets): @@ -34,25 +84,64 @@ def get_merged(coresets): merged_labels = np.vstack((merged_labels, coresets[i].labels)) return merged_data, merged_labels + @abstractmethod + def selector(self, data): + pass + + def update(self, iterator): + data, labels = iterator.data[0][1].asnumpy(), iterator.label[0][1].asnumpy() + idx = self.selector(data) + self.iterator.append(NDArrayIter(data=data[idx, :], label=labels[idx], shuffle=False, batch_size=len(idx))) + + data = np.delete(data, idx, axis=0) + labels = np.delete(labels, idx, axis=0) + batch_size = min(iterator.batch_size, data.shape[0]) + + return NDArrayIter(data=data, label=labels, shuffle=False, batch_size=batch_size) + + def reset(self): + self.iterator = MultiIter([]) + + +class Vanilla(Coreset): + """ + Vanilla coreset that is always size 0 + """ + def __init__(self): + super().__init__() + self.coreset_size = 0 + + def update(self, iterator): + return iterator + + def selector(self, data): + raise NotImplementedError + class Random(Coreset): """ Randomly select from (data, labels) and add to current coreset """ - def update(self, data, labels): - idx = np.random.choice(data.shape[0], self.coreset_size, False) - self.data.append(data[idx, :]) - self.labels.append(labels[idx, :]) - data = np.delete(data, idx, axis=0) - labels = np.delete(labels, idx, axis=0) - return data, labels + def __init__(self, coreset_size): + """ + Initialise the coreset + :param coreset_size: Size of the coreset + :type coreset_size: int + """ + super().__init__() + if coreset_size == 0: + raise ValueError("Coreset size should be > 0") + self.coreset_size = coreset_size + + def selector(self, data): + return np.random.choice(data.shape[0], self.coreset_size, False) -class KCenter(Coreset): +class KCenter(Random): """ Select k centers from (data, labels) and add to current coreset """ - def update(self, data, labels): + def selector(self, data): dists = np.full(data.shape[0], np.inf) current_id = 0 @@ -64,12 +153,7 @@ def update(self, data, labels): current_id = np.argmax(dists) dists = self.update_distance(dists, data, current_id) idx.append(current_id) - - self.data.append(data[idx, :]) - self.labels.append(labels[idx, :]) - data = np.delete(data, idx, axis=0) - labels = np.delete(labels, idx, axis=0) - return data, labels + return idx @staticmethod def update_distance(dists, data, current_id): diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py new file mode 100644 index 0000000..bcd7a5b --- /dev/null +++ b/examples/variational_continual_learning/experiment.py @@ -0,0 +1,171 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ============================================================================== + +import numpy as np + +from examples.variational_continual_learning.models import VanillaNN, BayesianNN +from examples.variational_continual_learning.coresets import Coreset + + +class Experiment: + def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_generator, + coreset, batch_size, single_head, ctx): + self.network_shape = network_shape + self.num_epochs = num_epochs + self.learning_rate = learning_rate + self.optimizer = optimizer + self.data_generator = data_generator + self.coreset = coreset + self.batch_size = batch_size + self.single_head = single_head + self.context = ctx + + # The following are to keep lint happy: + self.overall_accuracy = None + self.test_iterators = None + self.vanilla_model = None + self.bayesian_model = None + self.prediction_model = None + + self.reset() + + def reset(self): + self.coreset.reset() + self.overall_accuracy = np.array([]) + self.test_iterators = dict() + + model_params = dict( + network_shape=self.network_shape, + learning_rate=self.learning_rate, + optimizer=self.optimizer, + max_iter=self.num_epochs, + ctx=self.context + ) + + self.vanilla_model = VanillaNN(**model_params) + self.bayesian_model = BayesianNN(**model_params) + self.prediction_model = BayesianNN(**model_params) + + def run(self, verbose=True): + self.reset() + + # To begin with, set the priors to None. + # We will in fact use the results of maximum likelihood as the first prior + priors = None + + for task_id, (train_iterator, test_iterator) in enumerate(self.data_generator): + print("Task: ", task_id) + self.test_iterators[task_id] = test_iterator + + # Set the readout head to train_iterator + head = 0 if self.single_head else task_id + + # Update the coreset, and update the train iterator to remove the coreset data + train_iterator = self.coreset.update(train_iterator) + + batch_size = train_iterator.provide_label[0].shape[0] if self.batch_size is None else self.batch_size + + # Train network with maximum likelihood to initialize first model + if task_id == 0: + # TODO: test_iterator should be val_iter + print("Training vanilla neural network as starting point") + self.vanilla_model.train( + train_iterator=train_iterator, + validation_iterator=test_iterator, + task_id=task_id, + epochs=5, + batch_size=batch_size, + verbose=verbose) + + priors = self.vanilla_model.net.collect_params() + train_iterator.reset() + + # Train on non-coreset data + # TODO: test_iterator should be val_iter + print("Training main model") + self.bayesian_model.train( + train_iterator=train_iterator, + validation_iterator=test_iterator, + task_id=head, + epochs=self.num_epochs, + batch_size=self.batch_size, + priors=priors) + + # Set the priors for the next round of inference to be the current posteriors + priors = self.bayesian_model.posteriors + + # Incorporate coreset data and make prediction + acc = self.get_scores() + print(f"Accuracy for task {task_id}: {acc:.3f}") + self.overall_accuracy = self.concatenate_results(acc, self.overall_accuracy) + + def get_scores(self): + acc = [] + prediction_model = self.prediction_model + + if self.single_head: + if len(self.coreset.iterator) > 0: + train_iterator = Coreset.merge(self.coreset) + batch_size = train_iterator.provide_label.shape[0] if (self.batch_size is None) else self.batch_size + priors = self.bayesian_model.posteriors + print("Training single-head prediction model") + prediction_model.train( + train_iterator=train_iterator, + validation_iterator=None, + task_id=0, + epochs=self.num_epochs, + batch_size=batch_size, + priors=priors) + else: + print("Using main model as prediction model") + prediction_model = self.bayesian_model + + for task_id, test_iterator in self.test_iterators.items(): + test_iterator.reset() + if not self.single_head: + # TODO: What's the validation data here? + # TODO: different learning rate and max iter here? + if len(self.coreset.iterator) > 0: + print("Training multi-head prediction model") + prediction_model.train( + train_iterator=self.coreset.iterator, + validation_iterator=None, + task_id=task_id, + epochs=self.num_epochs, + batch_size=self.batch_size, + priors=self.bayesian_model.posteriors) + else: + print("Using main model as prediction model") + prediction_model = self.bayesian_model + + head = 0 if self.single_head else task_id + + predictions = prediction_model.prediction_prob(test_iterator, head) + predicted_means = np.mean(predictions, axis=0) + predicted_labels = np.argmax(predicted_means, axis=1) + test_labels = test_iterator.label[0][1].asnumpy() + cur_acc = len(np.where((predicted_labels - test_labels) == 0)[0]) * 1.0 / test_labels.shape[0] + acc.append(cur_acc) + return acc + + @staticmethod + def concatenate_results(score, all_score): + if all_score.size == 0: + all_score = np.reshape(score, (1, -1)) + else: + new_arr = np.empty((all_score.shape[0], all_score.shape[1] + 1)) + new_arr[:] = np.nan + new_arr[:, :-1] = all_score + all_score = np.vstack((new_arr, score)) + return all_score diff --git a/examples/variational_continual_learning/mnist.py b/examples/variational_continual_learning/mnist.py index 0bb1636..247e690 100644 --- a/examples/variational_continual_learning/mnist.py +++ b/examples/variational_continual_learning/mnist.py @@ -1,31 +1,82 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ============================================================================== + import mxnet as mx import numpy as np +from mxnet.io import NDArrayIter -class SplitMnistGenerator: - def __init__(self, data, batch_size): +class SplitTaskGenerator: + def __init__(self, data, batch_size, tasks): self.data = data self.batch_size = batch_size - self.num_tasks = 5 + self.tasks = tasks def __iter__(self): - for i in range(self.num_tasks): - idx_train_0 = np.where(self.data['train_label'] == i * 2)[0] - idx_train_1 = np.where(self.data['train_label'] == i * 2 + 1)[0] - idx_test_0 = np.where(self.data['test_label'] == i * 2)[0] - idx_test_1 = np.where(self.data['test_label'] == i * 2 + 1)[0] + for task in self.tasks: + idx_train_0 = np.where(self.data['train_label'] == task[0])[0] + idx_train_1 = np.where(self.data['train_label'] == task[1])[0] + idx_test_0 = np.where(self.data['test_label'] == task[0])[0] + idx_test_1 = np.where(self.data['test_label'] == task[1])[0] + # TODO: Validation data x_train = np.vstack((self.data['train_data'][idx_train_0], self.data['train_data'][idx_train_1])) - y_train = np.vstack((np.ones((idx_train_0.shape[0], 1)), -np.ones((idx_train_1.shape[0], 1)))) + y_train = np.hstack((np.ones((idx_train_0.shape[0],)), np.zeros((idx_train_1.shape[0],)))) x_test = np.vstack((self.data['test_data'][idx_test_0], self.data['test_data'][idx_test_1])) - y_test = np.vstack((np.ones((idx_test_0.shape[0], 1)), -np.ones((idx_test_1.shape[0], 1)))) + y_test = np.hstack((np.ones((idx_test_0.shape[0],)), np.zeros((idx_test_1.shape[0],)))) batch_size = x_train.shape[0] if self.batch_size is None else self.batch_size - train_iter = mx.io.NDArrayIter(x_train, y_train, batch_size, shuffle=True) + train_iter = NDArrayIter(x_train, y_train, batch_size, shuffle=True) batch_size = x_test.shape[0] if self.batch_size is None else self.batch_size - test_iter = mx.io.NDArrayIter(x_test, y_test, batch_size) + test_iter = NDArrayIter(x_test, y_test, batch_size) yield train_iter, test_iter return + + +# class SplitMnistGenerator: +# def __init__(self, data, batch_size): +# self.data = data +# self.batch_size = batch_size +# self.num_tasks = 5 +# +# def __iter__(self): +# for i in range(self.num_tasks): +# idx_train_0 = np.where(self.data['train_label'] == i * 2)[0] +# idx_train_1 = np.where(self.data['train_label'] == i * 2 + 1)[0] +# idx_test_0 = np.where(self.data['test_label'] == i * 2)[0] +# idx_test_1 = np.where(self.data['test_label'] == i * 2 + 1)[0] +# +# # TODO: Validation data +# x_train = np.vstack((self.data['train_data'][idx_train_0], self.data['train_data'][idx_train_1])) +# y_train = np.hstack((np.ones((idx_train_0.shape[0],)), np.zeros((idx_train_1.shape[0],)))) +# +# x_test = np.vstack((self.data['test_data'][idx_test_0], self.data['test_data'][idx_test_1])) +# y_test = np.hstack((np.ones((idx_test_0.shape[0],)), np.zeros((idx_test_1.shape[0],)))) +# +# batch_size = x_train.shape[0] if self.batch_size is None else self.batch_size +# train_iter = NDArrayIter(x_train, y_train, batch_size, shuffle=True) +# +# batch_size = x_test.shape[0] if self.batch_size is None else self.batch_size +# test_iter = NDArrayIter(x_test, y_test, batch_size) +# +# yield train_iter, test_iter +# return + + +class SplittableIterator(NDArrayIter): + def __init__(self, data): + super().__init__(data) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py new file mode 100644 index 0000000..1dda04d --- /dev/null +++ b/examples/variational_continual_learning/models.py @@ -0,0 +1,245 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ============================================================================== +import mxnet as mx +from mxnet.gluon import Trainer, ParameterDict +from mxnet.gluon.loss import SoftmaxCrossEntropyLoss +from mxnet.gluon.nn import HybridSequential, Dense +from mxnet.initializer import Xavier +from mxnet.metric import Accuracy + +from mxfusion import Model, Variable +from mxfusion.components import MXFusionGluonFunction +from mxfusion.components.distributions import Normal, Categorical +from mxfusion.inference import BatchInferenceLoop, create_Gaussian_meanfield, GradBasedInference, \ + StochasticVariationalInference, VariationalPosteriorForwardSampling +import numpy as np +from abc import ABC, abstractmethod + + +class BaseNN(ABC): + prefix = None + + def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): + self.task_idx = mx.nd.array([-1], dtype=np.float32) + self.model = None + self.network_shape = network_shape + self.learning_rate = learning_rate + self.optimizer = optimizer + self.max_iter = max_iter + self.loss = None + self.ctx = ctx + self.model = None + self.net = None + self.inference = None + self.create_net() + self.loss = SoftmaxCrossEntropyLoss() + + def create_net(self): + # Create net + self.net = HybridSequential(prefix=self.prefix) + with self.net.name_scope(): + for i in range(1, len(self.network_shape) - 1): + self.net.add(Dense(self.network_shape[i], activation="relu")) # , in_units=self.network_shape[i - 1])) + #  Last layer for classification + self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) + self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) + + def forward(self, data): + # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height) + data = mx.nd.flatten(data).as_in_context(self.ctx) + output = self.net(data) + return output + + def evaluate_accuracy(self, data_iterator): + acc = Accuracy() + for i, batch in enumerate(data_iterator): + output = self.forward(batch.data[0]) + labels = batch.label[0].as_in_context(self.ctx) + predictions = mx.nd.argmax(output, axis=1) + acc.update(preds=predictions, labels=labels) + return acc.get()[1] + + @abstractmethod + def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs, priors=None, verbose=True): + raise NotImplementedError + + def prediction_prob(self, test_iter, task_idx): + # TODO task_idx?? + prob = self.model.predict(test_iter) + return prob + + def get_weights(self): + params = self.net.collect_params() + # weights = [params.get('dense{}_weight'.format(i)) for i in range(len(self.network_shape) - 1)] + # biases = [params.get('dense{}_bias'.format(i)) for i in range(len(self.network_shape) - 1)] + # return weights, biases + return params + + @staticmethod + def print_status(epoch, loss, train_accuracy, test_accuracy): + print("Epoch {:4d}. Loss: {:8.2f}, Train_acc {:.3f}, Test_acc {:.3f}".format( + epoch, loss, train_accuracy, test_accuracy)) + + +class VanillaNN(BaseNN): + prefix = 'vanilla_' + + def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs, priors=None, verbose=True): + trainer = Trainer(self.net.collect_params(), self.optimizer, dict(learning_rate=self.learning_rate)) + + num_examples = 0 + for epoch in range(epochs): + cumulative_loss = 0 + for i, batch in enumerate(train_iterator): + with mx.autograd.record(): + output = self.forward(batch.data[0]) + labels = batch.label[0].as_in_context(self.ctx) + loss = self.loss(output, labels) + loss.backward() + trainer.step(batch_size=batch_size, ignore_stale_grad=True) + cumulative_loss += mx.nd.sum(loss).asscalar() + num_examples += len(labels) + + train_iterator.reset() + validation_iterator.reset() + train_accuracy = self.evaluate_accuracy(train_iterator) + validation_accuracy = self.evaluate_accuracy(validation_iterator) + self.print_status(epoch, cumulative_loss / num_examples, train_accuracy, validation_accuracy) + + +class BayesianNN(BaseNN): + prefix = 'bayesian_' + + def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): + super().__init__(network_shape, learning_rate, optimizer, max_iter, ctx) + # self.prior_means = dict() + # self.prior_variances = dict() + self.create_model() + + def create_model(self): + self.model = Model() + self.model.N = Variable() + self.model.f = MXFusionGluonFunction(self.net, num_outputs=1, broadcastable=False) + self.model.x = Variable(shape=(self.model.N, int(self.network_shape[0]))) + self.model.r = self.model.f(self.model.x) + self.model.y = Categorical.define_variable(log_prob=self.model.r, shape=(self.model.N, 1), num_classes=2) + + for v in self.model.r.factor.parameters.values(): + # self.prior_means[v] = Variable(shape=v.shape) + # self.prior_variances[v] = Variable(shape=v.shape) + # v.set_prior(Normal(mean=self.prior_means[v], variance=self.prior_variances[v])) + means = Variable(shape=v.shape) + variances = Variable(shape=v.shape) + setattr(self.model, v.inherited_name + "_mean", means) + setattr(self.model, v.inherited_name + "_variance", variances) + v.set_prior(Normal(mean=means, variance=variances)) + + def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs, priors=None, verbose=True): + for i, batch in enumerate(train_iterator): + if i > 0: + raise NotImplementedError("Currently not supported for more than one batch of data. " + "Please switch to using the MinibatchInferenceLoop") + + data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) + labels = mx.nd.expand_dims(batch.label[0], axis=-1).as_in_context(self.ctx) + + # pass some data to initialise the net + self.net(data[:1]) + + # TODO: Would rather have done this before! + self.create_model() + + # Set the priors + # if priors is None: + # for v in self.model.r.factor.parameters.values(): + # v.set_prior(Normal(mean=mx.nd.array([0]), variance=mx.nd.array([3.]))) + # if isinstance(priors, ParameterDict): + # for v in self.model.r.factor.parameters.values(): + # short_name = v.inherited_name.partition(self.prefix)[-1] + # mean = priors.get(short_name).data() + # v.set_prior(Normal(mean=mean, variance=mx.nd.array([3.]))) + # else: + # pass + + observed = [self.model.x, self.model.y] + q = create_Gaussian_meanfield(model=self.model, observed=observed) + alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed) + self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) + self.inference.initialize(y=labels, x=data) + + for v_name, v in self.model.r.factor.parameters.items(): + qv_mean = q[v.uuid].factor.mean + qv_variance = q[v.uuid].factor.variance + + # Initialization of mean/variances of NN weights + # TODO: Still needed? + a = self.inference.params[qv_variance].asnumpy() + a[:] = 1e-8 + self.inference.params[qv_mean] = self.net.collect_params()[v_name].data() + self.inference.params[qv_variance] = mx.nd.array(a) + + v_name_mean = v.inherited_name + "_mean" + v_name_variance = v.inherited_name + "_variance" + + if priors is None: + means = mx.nd.zeros(shape=v.shape) + variances = mx.nd.ones(shape=v.shape) * 3 + elif isinstance(priors, ParameterDict): + # This is a maximum likelihood estimate + short_name = v.inherited_name.partition(self.prefix)[-1] + means = priors.get(short_name).data() + variances = mx.nd.ones(shape=v.shape) * 3 + else: + # Use posteriors from previous round of inference + means = priors[v_name_mean] + variances = priors[v_name_variance] + + mean_prior = getattr(self.model, v_name_mean) + variance_prior = getattr(self.model, v_name_variance) + self.inference.params[mean_prior] = means + self.inference.params[variance_prior] = variances + + # Indicate that we don't want to perform inference over the priors + self.inference.params.param_dict[mean_prior]._grad_req = 'null' + self.inference.params.param_dict[variance_prior]._grad_req = 'null' + + callback = lambda epoch, loss: self.print_status(epoch, loss, float('nan'), float('nan')) + self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, + x=data, y=labels, verbose=False, callback=callback) + + @property + def posteriors(self): + q = self.inference.inference_algorithm.posterior + posteriors = dict() + for v_name, v in self.model.r.factor.parameters.items(): + posteriors[v.inherited_name + "_mean"] = self.inference.params[q[v.uuid].factor.mean].asnumpy() + posteriors[v.inherited_name + "_variance"] = self.inference.params[q[v.uuid].factor.variance].asnumpy() + return posteriors + + def prediction_prob(self, test_iter, task_idx): + if self.inference is None: + raise RuntimeError("Model not yet learnt") + + for i, batch in enumerate(test_iter): + if i > 0: + raise NotImplementedError("Currently not supported for more than one batch of data. " + "Please switch to using the MinibatchInferenceLoop") + + data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) + N, D = map(lambda x: mx.nd.array([x], ctx=self.ctx), data.shape) + + prediction_inference = VariationalPosteriorForwardSampling( + 10, [self.model.x], self.inference, [self.model.r]) + res = prediction_inference.run(x=mx.nd.array(data)) + return res[0].asnumpy() diff --git a/examples/variational_continual_learning/nets.py b/examples/variational_continual_learning/nets.py deleted file mode 100644 index bea34b6..0000000 --- a/examples/variational_continual_learning/nets.py +++ /dev/null @@ -1,60 +0,0 @@ -import mxnet as mx -import numpy as np -from abc import ABC - - -class BaseNN(ABC): - def __init__(self, network_shape): - # input and output placeholders - self.task_idx = mx.nd.array(name='task_idx', dtype=np.float32) - self.model = None - self.network_shape = network_shape - self.loss = None - - def train(self, train_iter, val_iter, batch_size, ctx): - # data = mx.sym.var('data') - # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height) - # data = mx.sym.flatten(data=data) - - # # create a trainable module on compute context - # self.model = mx.mod.Module(symbol=self.net, context=ctx) - self.model.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label, ctx=ctx) - init = mx.init.Xavier(factor_type="in", magnitude=2.34) - self.model.init_params(initializer=init, force_init=True) - self.model.fit(train_iter, # train data - eval_data=val_iter, # validation data - optimizer='adam', # use SGD to train - optimizer_params={'learning_rate': 0.001}, # use fixed learning rate - eval_metric='acc', # report accuracy during training - batch_end_callback=mx.callback.Speedometer(batch_size, 100), - # output progress for each 100 data batches - num_epoch=10) # train for at most 50 dataset passes - # predict accuracy of mlp - acc = mx.metric.Accuracy() - self.model.score(val_iter, acc) - return acc - - def prediction_prob(self, test_iter, task_idx): - # TODO task_idx?? - prob = self.model.predict(test_iter) - return prob - - -class VanillaNN(BaseNN): - def __init__(self, network_shape, previous_weights=None): - super(VanillaNN, self).__init__(network_shape) - - # Create net - self.net = mx.gluon.nn.HybridSequential(prefix='vanilla_') - with self.net.name_scope(): - for layer in network_shape[1:-1]: - self.net.add(mx.gluon.nn.Dense(layer, activation="relu")) - #  Last layer for classification - self.net.add(mx.gluon.nn.Dense(network_shape[-1], flatten=True, in_units=network_shape[-2])) - self.loss = mx.gluon.loss.SoftmaxCrossEntropyLoss() - - -class MeanFieldNN(BaseNN): - def __init__(self, network_shape, prior_means, prior_log_variances): - super(MeanFieldNN, self).__init__(network_shape) - raise NotImplementedError diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index f1474d6..37e5045 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -1,21 +1,31 @@ -import numpy as np -import gzip -import sys +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ============================================================================== -import mxfusion as mf +import numpy as np import mxnet as mx import matplotlib.pyplot as plt -from examples.variational_continual_learning.mnist import SplitMnistGenerator -from examples.variational_continual_learning.nets import VanillaNN, MeanFieldNN -from examples.variational_continual_learning.coresets import Random, KCenter, Coreset +from examples.variational_continual_learning.experiment import Experiment +from examples.variational_continual_learning.mnist import SplitTaskGenerator +from examples.variational_continual_learning.coresets import Random, KCenter, Vanilla import logging logging.getLogger().setLevel(logging.DEBUG) # logging to stdout # Set the compute context, GPU is available otherwise CPU -ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() +CTX = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() def set_seeds(seed=42): @@ -23,143 +33,76 @@ def set_seeds(seed=42): np.random.seed(seed) -def plot(filename, vcl, rand_vcl, kcen_vcl): - plt.rc('text', usetex=True) - plt.rc('font', family='serif') - - fig = plt.figure(figsize=(7,3)) +def plot(title, experiments, tasks): + fig = plt.figure(figsize=(len(tasks), 3)) ax = plt.gca() - plt.plot(np.arange(len(vcl))+1, vcl, label='VCL', marker='o') - plt.plot(np.arange(len(rand_vcl))+1, rand_vcl, label='VCL + Random Coreset', marker='o') - plt.plot(np.arange(len(kcen_vcl))+1, kcen_vcl, label='VCL + K-center Coreset', marker='o') - ax.set_xticks(range(1, len(vcl)+1)) + + x = range(1, len(tasks) + 1) + + for experiment in experiments: + acc = np.nanmean(experiment.overall_accuracy, axis=1) + label = experiment.coreset.__class__.__name__ + plt.plot(x, acc, label=label, marker='o') + ax.set_xticks(x) ax.set_ylabel('Average accuracy') - ax.set_xlabel('\# tasks') + ax.set_xlabel('# tasks') ax.legend() + ax.set_title(title) + plt.show() + + filename = "vcl_{}.pdf".format(title) fig.savefig(filename, bbox_inches='tight') plt.close() -class Experiment: - def __init__(self, network_shape, num_epochs, data_generator, - coreset_func, batch_size, single_head): - self.network_shape = network_shape - self.num_epochs = num_epochs - self.data_generator = data_generator - self.coresets = dict((i, coreset_func()) for i in range(gen.num_tasks)) - self.batch_size = batch_size - self.single_head = single_head - self.overall_accuracy = np.array([]) - self.x_test_sets = [] - self.y_test_sets = [] - - def run(self): - self.x_test_sets = [] - self.y_test_sets = [] - - for task_id, (train_iter, test_iter) in enumerate(self.data_generator): - self.x_test_sets.append(test_iter.data[0][1]) - self.y_test_sets.append(test_iter.label[0][1]) - - # Set the readout head to train_iter - head = 0 if self.single_head else task_id - - mean_field_weights = None - mean_field_variances = None - - # Train network with maximum likelihood to initialize first model - if task_id == 0: - vanilla_model = VanillaNN(nn_shape) - vanilla_model.train(train_iter, task_id, self.num_epochs, self.batch_size) - mean_field_weights = vanilla_model.get_weights() - - # Train on non-coreset data - mean_field_model = MeanFieldNN( - nn_shape, prior_means=mean_field_weights, prior_log_variances=mean_field_variances) - mean_field_model.train(train_iter, head, self.num_epochs, self.batch_size) - mean_field_weights, mean_field_variances = mean_field_model.get_weights() - - # Incorporate coreset data and make prediction - acc = self.get_scores(mean_field_model) - self.overall_accuracy = self.concatenate_results(acc, self.overall_accuracy) - - def get_scores(self, model): - mf_weights, mf_variances = model.get_weights() - acc = [] - final_model = None - - if self.single_head: - if len(self.coresets) > 0: - x_train, y_train = Coreset.merge(self.coreset) - bsize = x_train.shape[0] if (batch_size is None) else batch_size - final_model = MeanFieldNN(x_train.shape[1], hidden_size, y_train.shape[1], x_train.shape[0], - prev_means=mf_weights, prev_log_variances=mf_variances) - final_model.train(x_train, y_train, 0, no_epochs, bsize) - else: - final_model = model - - for i in range(len(x_testsets)): - if not single_head: - if len(x_coresets) > 0: - x_train, y_train = x_coresets[i], y_coresets[i] - bsize = x_train.shape[0] if (batch_size is None) else batch_size - final_model = MeanFieldNN(x_train.shape[1], hidden_size, y_train.shape[1], x_train.shape[0], - prev_means=mf_weights, prev_log_variances=mf_variances) - final_model.train(x_train, y_train, i, no_epochs, bsize) - else: - final_model = model - - head = 0 if single_head else i - x_test, y_test = x_testsets[i], y_testsets[i] - - pred = final_model.prediction_prob(x_test, head) - pred_mean = np.mean(pred, axis=0) - pred_y = np.argmax(pred_mean, axis=1) - y = np.argmax(y_test, axis=1) - cur_acc = len(np.where((pred_y - y) == 0)[0]) * 1.0 / y.shape[0] - acc.append(cur_acc) - - if len(x_coresets) > 0 and not single_head: - final_model.close_session() - - if len(x_coresets) > 0 and single_head: - final_model.close_session() - - return acc - - @staticmethod - def concatenate_results(score, all_score): - if all_score.size == 0: - all_score = np.reshape(score, (1, -1)) - else: - new_arr = np.empty((all_score.shape[0], all_score.shape[1]+1)) - new_arr[:] = np.nan - new_arr[:,:-1] = all_score - all_score = np.vstack((new_arr, score)) - return all_score - - if __name__ == "__main__": # Load data data = mx.test_utils.get_mnist() input_dim = np.prod(data['train_data'][0].shape) # Note the data will get flattened later - gen = SplitMnistGenerator(data, batch_size=None) - - nn_shape = (input_dim, 256, 256, 2) # binary classification - experiments = dict( - vanilla=dict(coreset_func=lambda: Random(coreset_size=0), - network_shape=nn_shape, num_epochs=120, single_head=False), - random=dict(coreset_func=lambda: Random(coreset_size=40), - network_shape=nn_shape, num_epochs=120, single_head=False), - k_center=dict(coreset_func=lambda: KCenter(coreset_size=40), - network_shape=nn_shape, num_epochs=120, single_head=False) + data_dtype = data['train_data'].dtype + label_dtype = data['train_label'].dtype + tasks = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)) + gen = SplitTaskGenerator(data, batch_size=None, tasks=tasks) + + network_shape = (input_dim, 256, 256, 2) # binary classification + num_epochs = 120 + learning_rate = 0.01 + optimizer = 'adam' + + experiment_parameters = ( + dict( + coreset=Vanilla(), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=False), + dict( + coreset=Random(coreset_size=40), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=False), + dict( + coreset=KCenter(coreset_size=40), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=False) ) + experiments = [] + # Run experiments - for name, params in experiments.items(): - print("Running experiment", name) + for params in experiment_parameters: + print("Running experiment", params['coreset'].__class__.__name__) set_seeds() - experiment = Experiment(batch_size=None, data_generator=gen, **params) + experiment = Experiment(batch_size=None, data_generator=gen, ctx=CTX, **params) experiment.run() print(experiment.overall_accuracy) + experiments.append(experiment) + + plot("split_mnist", experiments, tasks) diff --git a/mxfusion/inference/batch_loop.py b/mxfusion/inference/batch_loop.py index 1061365..0d08f8f 100644 --- a/mxfusion/inference/batch_loop.py +++ b/mxfusion/inference/batch_loop.py @@ -38,7 +38,7 @@ class BatchInferenceLoop(GradLoop): """ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', - learning_rate=1e-3, max_iter=1000, n_prints=10, verbose=False): + learning_rate=1e-3, max_iter=1000, n_prints=10, verbose=False, callback=None): """ :param infr_executor: The MXNet function that computes the training objective. :type infr_executor: MXNet Gluon Block @@ -54,6 +54,10 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', :type learning_rate: float :param max_iter: the maximum number of iterations of gradient optimization :type max_iter: int + :param n_prints: number of times to print status + :type n_prints: int + :param callback: Callback function for custom print statements + :type callback: func :param verbose: whether to print per-iteration messages. :type verbose: boolean """ @@ -71,5 +75,9 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', end='') if i % iter_step == 0 and i > 0: print() + if callback is not None: + callback(i + 1, loss.asscalar()) trainer.step(batch_size=1, ignore_stale_grad=True) loss = infr_executor(mx.nd.zeros(1, ctx=ctx), *data) + if callback is not None: + callback(max_iter + 1, loss.asscalar()) diff --git a/mxfusion/inference/grad_based_inference.py b/mxfusion/inference/grad_based_inference.py index f52495e..236effd 100644 --- a/mxfusion/inference/grad_based_inference.py +++ b/mxfusion/inference/grad_based_inference.py @@ -67,7 +67,7 @@ def create_executor(self): return infr def run(self, optimizer='adam', learning_rate=1e-3, max_iter=2000, - verbose=False, **kwargs): + verbose=False, callback=None, **kwargs): """ Run the inference method. @@ -79,8 +79,10 @@ def run(self, optimizer='adam', learning_rate=1e-3, max_iter=2000, :type max_iter: int :param verbose: whether to print per-iteration messages. :type verbose: boolean - :param **kwargs: The keyword arguments specify the data for inferences. The key of each argument is the name of the corresponding - variable in model definition and the value of the argument is the data in numpy array format. + :param callback: Callback function for custom print statements + :type callback: func + :param kwargs: The keyword arguments specify the data for inferences. The key of each argument is the name of + the corresponding variable in model definition and the value of the argument is the data in numpy array format. """ data = [kwargs[v] for v in self.observed_variable_names] self.initialize(**kwargs) @@ -89,4 +91,4 @@ def run(self, optimizer='adam', learning_rate=1e-3, max_iter=2000, return self._grad_loop.run( infr_executor=infr, data=data, param_dict=self.params.param_dict, ctx=self.mxnet_context, optimizer=optimizer, - learning_rate=learning_rate, max_iter=max_iter, verbose=verbose) + learning_rate=learning_rate, max_iter=max_iter, verbose=verbose, callback=callback) From 2df2d480d2594df2d5e44b1843a203eaa1e894b6 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Tue, 20 Nov 2018 16:48:01 +0000 Subject: [PATCH 05/32] Fixed some modelling bugs Improved output formatting Fixed a bug in the batch loop executor --- .../experiment.py | 7 +-- .../variational_continual_learning/models.py | 50 ++++--------------- .../variational_continual_learning.py | 16 +++--- mxfusion/inference/batch_loop.py | 2 +- 4 files changed, 26 insertions(+), 49 deletions(-) diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index bcd7a5b..4e5955f 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -107,7 +107,7 @@ def run(self, verbose=True): # Incorporate coreset data and make prediction acc = self.get_scores() - print(f"Accuracy for task {task_id}: {acc:.3f}") + print("Accuracies after task {}: [{}]".format(task_id, ", ".join(map("{:.3f}".format, acc)))) self.overall_accuracy = self.concatenate_results(acc, self.overall_accuracy) def get_scores(self): @@ -146,16 +146,17 @@ def get_scores(self): batch_size=self.batch_size, priors=self.bayesian_model.posteriors) else: - print("Using main model as prediction model") + print(f"Using main model as prediction model for task {task_id}") prediction_model = self.bayesian_model head = 0 if self.single_head else task_id + print(f"Generating predictions for task {task_id}") predictions = prediction_model.prediction_prob(test_iterator, head) predicted_means = np.mean(predictions, axis=0) predicted_labels = np.argmax(predicted_means, axis=1) test_labels = test_iterator.label[0][1].asnumpy() - cur_acc = len(np.where((predicted_labels - test_labels) == 0)[0]) * 1.0 / test_labels.shape[0] + cur_acc = len(np.where(np.abs(predicted_labels - test_labels) < 1e-10)[0]) * 1.0 / test_labels.shape[0] acc.append(cur_acc) return acc diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 1dda04d..5126d0f 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -50,7 +50,7 @@ def create_net(self): self.net = HybridSequential(prefix=self.prefix) with self.net.name_scope(): for i in range(1, len(self.network_shape) - 1): - self.net.add(Dense(self.network_shape[i], activation="relu")) # , in_units=self.network_shape[i - 1])) + self.net.add(Dense(self.network_shape[i], activation="relu", in_units=self.network_shape[i - 1])) #  Last layer for classification self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) @@ -81,15 +81,12 @@ def prediction_prob(self, test_iter, task_idx): def get_weights(self): params = self.net.collect_params() - # weights = [params.get('dense{}_weight'.format(i)) for i in range(len(self.network_shape) - 1)] - # biases = [params.get('dense{}_bias'.format(i)) for i in range(len(self.network_shape) - 1)] - # return weights, biases return params @staticmethod - def print_status(epoch, loss, train_accuracy, test_accuracy): - print("Epoch {:4d}. Loss: {:8.2f}, Train_acc {:.3f}, Test_acc {:.3f}".format( - epoch, loss, train_accuracy, test_accuracy)) + def print_status(epoch, loss, train_accuracy=float("nan"), validation_accuracy=float("nan")): + print(f"Epoch {epoch:4d}. Loss: {loss:8.2f}, " + f"Train accuracy {train_accuracy:.3f}, Validation accuracy {validation_accuracy:.3f}") class VanillaNN(BaseNN): @@ -123,22 +120,17 @@ class BayesianNN(BaseNN): def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): super().__init__(network_shape, learning_rate, optimizer, max_iter, ctx) - # self.prior_means = dict() - # self.prior_variances = dict() self.create_model() def create_model(self): self.model = Model() self.model.N = Variable() self.model.f = MXFusionGluonFunction(self.net, num_outputs=1, broadcastable=False) - self.model.x = Variable(shape=(self.model.N, int(self.network_shape[0]))) + self.model.x = Variable(shape=(self.model.N, self.network_shape[0])) self.model.r = self.model.f(self.model.x) self.model.y = Categorical.define_variable(log_prob=self.model.r, shape=(self.model.N, 1), num_classes=2) for v in self.model.r.factor.parameters.values(): - # self.prior_means[v] = Variable(shape=v.shape) - # self.prior_variances[v] = Variable(shape=v.shape) - # v.set_prior(Normal(mean=self.prior_means[v], variance=self.prior_variances[v])) means = Variable(shape=v.shape) variances = Variable(shape=v.shape) setattr(self.model, v.inherited_name + "_mean", means) @@ -158,19 +150,7 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs self.net(data[:1]) # TODO: Would rather have done this before! - self.create_model() - - # Set the priors - # if priors is None: - # for v in self.model.r.factor.parameters.values(): - # v.set_prior(Normal(mean=mx.nd.array([0]), variance=mx.nd.array([3.]))) - # if isinstance(priors, ParameterDict): - # for v in self.model.r.factor.parameters.values(): - # short_name = v.inherited_name.partition(self.prefix)[-1] - # mean = priors.get(short_name).data() - # v.set_prior(Normal(mean=mean, variance=mx.nd.array([3.]))) - # else: - # pass + # self.create_model() observed = [self.model.x, self.model.y] q = create_Gaussian_meanfield(model=self.model, observed=observed) @@ -178,17 +158,7 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) self.inference.initialize(y=labels, x=data) - for v_name, v in self.model.r.factor.parameters.items(): - qv_mean = q[v.uuid].factor.mean - qv_variance = q[v.uuid].factor.variance - - # Initialization of mean/variances of NN weights - # TODO: Still needed? - a = self.inference.params[qv_variance].asnumpy() - a[:] = 1e-8 - self.inference.params[qv_mean] = self.net.collect_params()[v_name].data() - self.inference.params[qv_variance] = mx.nd.array(a) - + for v in self.model.r.factor.parameters.values(): v_name_mean = v.inherited_name + "_mean" v_name_variance = v.inherited_name + "_variance" @@ -207,6 +177,9 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs mean_prior = getattr(self.model, v_name_mean) variance_prior = getattr(self.model, v_name_variance) + + # v.set_prior(Normal(mean=mean_prior, variance=variance_prior)) + self.inference.params[mean_prior] = means self.inference.params[variance_prior] = variances @@ -214,9 +187,8 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs self.inference.params.param_dict[mean_prior]._grad_req = 'null' self.inference.params.param_dict[variance_prior]._grad_req = 'null' - callback = lambda epoch, loss: self.print_status(epoch, loss, float('nan'), float('nan')) self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, - x=data, y=labels, verbose=False, callback=callback) + x=data, y=labels, verbose=False, callback=self.print_status) @property def posteriors(self): diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 37e5045..24f3ab0 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -16,6 +16,7 @@ import mxnet as mx import matplotlib.pyplot as plt +from datetime import datetime from examples.variational_continual_learning.experiment import Experiment from examples.variational_continual_learning.mnist import SplitTaskGenerator @@ -48,25 +49,24 @@ def plot(title, experiments, tasks): ax.set_xlabel('# tasks') ax.legend() ax.set_title(title) - plt.show() - - filename = "vcl_{}.pdf".format(title) + filename = f"vcl_{title}_{datetime.now().isoformat()[:-7]}.pdf" fig.savefig(filename, bbox_inches='tight') + plt.show() plt.close() if __name__ == "__main__": # Load data data = mx.test_utils.get_mnist() - input_dim = np.prod(data['train_data'][0].shape) # Note the data will get flattened later + input_dim = int(np.prod(data['train_data'][0].shape)) # Note the data will get flattened later data_dtype = data['train_data'].dtype label_dtype = data['train_label'].dtype tasks = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)) gen = SplitTaskGenerator(data, batch_size=None, tasks=tasks) network_shape = (input_dim, 256, 256, 2) # binary classification - num_epochs = 120 + num_epochs = 120 # 120 learning_rate = 0.01 optimizer = 'adam' @@ -98,11 +98,15 @@ def plot(title, experiments, tasks): # Run experiments for params in experiment_parameters: + print("-" * 50) print("Running experiment", params['coreset'].__class__.__name__) + print("-" * 50) set_seeds() experiment = Experiment(batch_size=None, data_generator=gen, ctx=CTX, **params) experiment.run() print(experiment.overall_accuracy) experiments.append(experiment) + print("-" * 50) + print() - plot("split_mnist", experiments, tasks) + plot("Split MNIST", experiments, tasks) diff --git a/mxfusion/inference/batch_loop.py b/mxfusion/inference/batch_loop.py index 0d08f8f..4b209d2 100644 --- a/mxfusion/inference/batch_loop.py +++ b/mxfusion/inference/batch_loop.py @@ -78,6 +78,6 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', if callback is not None: callback(i + 1, loss.asscalar()) trainer.step(batch_size=1, ignore_stale_grad=True) - loss = infr_executor(mx.nd.zeros(1, ctx=ctx), *data) + loss, _ = infr_executor(mx.nd.zeros(1, ctx=ctx), *data) if callback is not None: callback(max_iter + 1, loss.asscalar()) From 3f3170fe9ec8c458337f92bc95be5c4a71828efe Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Tue, 20 Nov 2018 17:42:01 +0000 Subject: [PATCH 06/32] Fixed provide_data and provide_label in multiiterator Removed unneeded code in vanilla coreset Fixed a print statement --- .../coresets.py | 19 +++---------------- .../variational_continual_learning/models.py | 2 +- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/examples/variational_continual_learning/coresets.py b/examples/variational_continual_learning/coresets.py index a8cd681..0ea97c0 100644 --- a/examples/variational_continual_learning/coresets.py +++ b/examples/variational_continual_learning/coresets.py @@ -16,6 +16,7 @@ import mxnet as mx from mxnet.io import NDArrayIter, DataIter, DataBatch from abc import ABCMeta, abstractmethod +import itertools class MultiIter(DataIter): @@ -47,11 +48,11 @@ def reset(self): @property def provide_data(self): - return [b for b in self.iterators[0].provide_data] + [b for b in self.iterators[1].provide_data] + return list(itertools.chain(map(lambda i: i.provide_data, self.iterators))) @property def provide_label(self): - return [b for b in self.iterators[0].provide_label] + [b for b in self.iterators[1].provide_label] + return list(itertools.chain(map(lambda i: i.provide_label, self.iterators))) def append(self, iterator): if not isinstance(iterator, (DataIter, NDArrayIter)): @@ -70,20 +71,6 @@ def __init__(self): self.iterator = None self.reset() - @staticmethod - def get_merged(coresets): - """ - Get merged data and labels from the list of coresets - :param coresets: list of coresets - :type coresets: list(Coreset) - :return: merged data and labels - """ - merged_data, merged_labels = coresets[0].data, coresets[0].labels - for i in range(1, len(coresets)): - merged_data = np.vstack((merged_data, coresets[i].data)) - merged_labels = np.vstack((merged_labels, coresets[i].labels)) - return merged_data, merged_labels - @abstractmethod def selector(self, data): pass diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 5126d0f..a684f5f 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -112,7 +112,7 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs validation_iterator.reset() train_accuracy = self.evaluate_accuracy(train_iterator) validation_accuracy = self.evaluate_accuracy(validation_iterator) - self.print_status(epoch, cumulative_loss / num_examples, train_accuracy, validation_accuracy) + self.print_status(epoch + 1, cumulative_loss / num_examples, train_accuracy, validation_accuracy) class BayesianNN(BaseNN): From 52267c39945e063a32ee6d9cac5973e59af8c42d Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Wed, 21 Nov 2018 10:50:58 +0000 Subject: [PATCH 07/32] Added Permuted MNIST generator --- .../experiment.py | 4 +- .../variational_continual_learning/mnist.py | 75 ++++++++++--------- .../variational_continual_learning.py | 70 +++++++++++------ 3 files changed, 86 insertions(+), 63 deletions(-) diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index 4e5955f..b7b7a86 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -78,8 +78,7 @@ def run(self, verbose=True): # Train network with maximum likelihood to initialize first model if task_id == 0: - # TODO: test_iterator should be val_iter - print("Training vanilla neural network as starting point") + print("Training non-Bayesian neural network as starting point") self.vanilla_model.train( train_iterator=train_iterator, validation_iterator=test_iterator, @@ -92,7 +91,6 @@ def run(self, verbose=True): train_iterator.reset() # Train on non-coreset data - # TODO: test_iterator should be val_iter print("Training main model") self.bayesian_model.train( train_iterator=train_iterator, diff --git a/examples/variational_continual_learning/mnist.py b/examples/variational_continual_learning/mnist.py index 247e690..c0767fb 100644 --- a/examples/variational_continual_learning/mnist.py +++ b/examples/variational_continual_learning/mnist.py @@ -12,71 +12,74 @@ # permissions and limitations under the License. # ============================================================================== -import mxnet as mx import numpy as np from mxnet.io import NDArrayIter -class SplitTaskGenerator: +class TaskGenerator: def __init__(self, data, batch_size, tasks): self.data = data self.batch_size = batch_size self.tasks = tasks + +class SplitTaskGenerator(TaskGenerator): def __iter__(self): + """ + Iterate over tasks + :return: the next task + :rtype: NDArrayIter + """ for task in self.tasks: idx_train_0 = np.where(self.data['train_label'] == task[0])[0] idx_train_1 = np.where(self.data['train_label'] == task[1])[0] idx_test_0 = np.where(self.data['test_label'] == task[0])[0] idx_test_1 = np.where(self.data['test_label'] == task[1])[0] - # TODO: Validation data x_train = np.vstack((self.data['train_data'][idx_train_0], self.data['train_data'][idx_train_1])) y_train = np.hstack((np.ones((idx_train_0.shape[0],)), np.zeros((idx_train_1.shape[0],)))) x_test = np.vstack((self.data['test_data'][idx_test_0], self.data['test_data'][idx_test_1])) y_test = np.hstack((np.ones((idx_test_0.shape[0],)), np.zeros((idx_test_1.shape[0],)))) - batch_size = x_train.shape[0] if self.batch_size is None else self.batch_size + batch_size = self.batch_size or x_train.shape[0] train_iter = NDArrayIter(x_train, y_train, batch_size, shuffle=True) - batch_size = x_test.shape[0] if self.batch_size is None else self.batch_size + batch_size = self.batch_size or x_test.shape[0] test_iter = NDArrayIter(x_test, y_test, batch_size) yield train_iter, test_iter return -# class SplitMnistGenerator: -# def __init__(self, data, batch_size): -# self.data = data -# self.batch_size = batch_size -# self.num_tasks = 5 -# -# def __iter__(self): -# for i in range(self.num_tasks): -# idx_train_0 = np.where(self.data['train_label'] == i * 2)[0] -# idx_train_1 = np.where(self.data['train_label'] == i * 2 + 1)[0] -# idx_test_0 = np.where(self.data['test_label'] == i * 2)[0] -# idx_test_1 = np.where(self.data['test_label'] == i * 2 + 1)[0] -# -# # TODO: Validation data -# x_train = np.vstack((self.data['train_data'][idx_train_0], self.data['train_data'][idx_train_1])) -# y_train = np.hstack((np.ones((idx_train_0.shape[0],)), np.zeros((idx_train_1.shape[0],)))) -# -# x_test = np.vstack((self.data['test_data'][idx_test_0], self.data['test_data'][idx_test_1])) -# y_test = np.hstack((np.ones((idx_test_0.shape[0],)), np.zeros((idx_test_1.shape[0],)))) -# -# batch_size = x_train.shape[0] if self.batch_size is None else self.batch_size -# train_iter = NDArrayIter(x_train, y_train, batch_size, shuffle=True) -# -# batch_size = x_test.shape[0] if self.batch_size is None else self.batch_size -# test_iter = NDArrayIter(x_test, y_test, batch_size) -# -# yield train_iter, test_iter -# return +class PermutedTaskGenerator(TaskGenerator): + def __iter__(self): + """ + Iterate over tasks + :return: the next task + :rtype: NDArrayIter + """ + for _ in self.tasks: + x_train = self.data['train_data'] + y_train = self.data['train_label'] + + x_test = self.data['test_data'] + y_test = self.data['test_label'] + permutation = np.random.permutation(x_train.shape[1]) -class SplittableIterator(NDArrayIter): - def __init__(self, data): - super().__init__(data) + x_train = x_train[:, permutation] + x_test = x_test[:, permutation] + + # Convert to one hot encodings + # y_train = np.eye(10)[y_train] + # y_test = np.eye(10)[y_test] + + batch_size = self.batch_size or x_train.shape[0] + train_iter = NDArrayIter(x_train, y_train, batch_size, shuffle=True) + + batch_size = self.batch_size or x_test.shape[0] + test_iter = NDArrayIter(x_test, y_test, batch_size) + + yield train_iter, test_iter + return diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 24f3ab0..8cd3b9a 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -19,7 +19,7 @@ from datetime import datetime from examples.variational_continual_learning.experiment import Experiment -from examples.variational_continual_learning.mnist import SplitTaskGenerator +from examples.variational_continual_learning.mnist import SplitTaskGenerator, PermutedTaskGenerator from examples.variational_continual_learning.coresets import Random, KCenter, Vanilla import logging @@ -34,8 +34,8 @@ def set_seeds(seed=42): np.random.seed(seed) -def plot(title, experiments, tasks): - fig = plt.figure(figsize=(len(tasks), 3)) +def plot(title, experiments, num_tasks): + fig = plt.figure(figsize=(num_tasks, 3)) ax = plt.gca() x = range(1, len(tasks) + 1) @@ -60,13 +60,32 @@ def plot(title, experiments, tasks): # Load data data = mx.test_utils.get_mnist() input_dim = int(np.prod(data['train_data'][0].shape)) # Note the data will get flattened later + + # noinspection PyUnreachableCode + if True: + title = "Split MNIST" + tasks = ((0, 1), (2, 3)) # , (4, 5), (6, 7), (8, 9)) + num_epochs = 1 # 120 + batch_size = None + gen = SplitTaskGenerator + label_shape = 2 + network_shape = (input_dim, 256, 256, (label_shape, )) + single_head = False + coreset_size = 40 + else: + title = "Permuted MNIST" + tasks = range(2) # range(10) + num_epochs = 1 # 100 + batch_size = 256 + gen = PermutedTaskGenerator + label_shape = 10 + network_shape = (input_dim, 100, 100, label_shape) + single_head = True + coreset_size = 200 + data_dtype = data['train_data'].dtype label_dtype = data['train_label'].dtype - tasks = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)) - gen = SplitTaskGenerator(data, batch_size=None, tasks=tasks) - network_shape = (input_dim, 256, 256, 2) # binary classification - num_epochs = 120 # 120 learning_rate = 0.01 optimizer = 'adam' @@ -77,21 +96,21 @@ def plot(title, experiments, tasks): optimizer=optimizer, network_shape=network_shape, num_epochs=num_epochs, - single_head=False), - dict( - coreset=Random(coreset_size=40), - learning_rate=learning_rate, - optimizer=optimizer, - network_shape=network_shape, - num_epochs=num_epochs, - single_head=False), - dict( - coreset=KCenter(coreset_size=40), - learning_rate=learning_rate, - optimizer=optimizer, - network_shape=network_shape, - num_epochs=num_epochs, - single_head=False) + single_head=single_head), + # dict( + # coreset=Random(coreset_size=coreset_size), + # learning_rate=learning_rate, + # optimizer=optimizer, + # network_shape=network_shape, + # num_epochs=num_epochs, + # single_head=single_head), + # dict( + # coreset=KCenter(coreset_size=coreset_size), + # learning_rate=learning_rate, + # optimizer=optimizer, + # network_shape=network_shape, + # num_epochs=num_epochs, + # single_head=single_head) ) experiments = [] @@ -102,11 +121,14 @@ def plot(title, experiments, tasks): print("Running experiment", params['coreset'].__class__.__name__) print("-" * 50) set_seeds() - experiment = Experiment(batch_size=None, data_generator=gen, ctx=CTX, **params) + experiment = Experiment(batch_size=batch_size, + data_generator=gen(data, batch_size=batch_size, tasks=tasks), + ctx=CTX, + **params) experiment.run() print(experiment.overall_accuracy) experiments.append(experiment) print("-" * 50) print() - plot("Split MNIST", experiments, tasks) + plot(title, experiments, len(tasks)) From 672a5b233928d4128d881d38453afa6ccb47a945 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Wed, 21 Nov 2018 18:09:24 +0000 Subject: [PATCH 08/32] Created Task class Updated model class to support multi-headed models Updated experiment class to reflect changes --- .../experiment.py | 62 +++++---- .../variational_continual_learning/mnist.py | 17 ++- .../variational_continual_learning/models.py | 130 ++++++++++++++---- 3 files changed, 153 insertions(+), 56 deletions(-) diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index b7b7a86..5fe16e7 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -22,6 +22,7 @@ class Experiment: def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_generator, coreset, batch_size, single_head, ctx): self.network_shape = network_shape + self.original_network_shape = network_shape # Only used when resetting self.num_epochs = num_epochs self.learning_rate = learning_rate self.optimizer = optimizer @@ -38,14 +39,9 @@ def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_gen self.bayesian_model = None self.prediction_model = None - self.reset() - - def reset(self): - self.coreset.reset() - self.overall_accuracy = np.array([]) - self.test_iterators = dict() - - model_params = dict( + @property + def model_params(self): + return dict( network_shape=self.network_shape, learning_rate=self.learning_rate, optimizer=self.optimizer, @@ -53,9 +49,18 @@ def reset(self): ctx=self.context ) - self.vanilla_model = VanillaNN(**model_params) - self.bayesian_model = BayesianNN(**model_params) - self.prediction_model = BayesianNN(**model_params) + def reset(self): + self.coreset.reset() + self.network_shape = self.original_network_shape + self.overall_accuracy = np.array([]) + self.test_iterators = dict() + + print("Creating Vanilla Model") + self.vanilla_model = VanillaNN(**self.model_params) + print("Creating Bayesian Model") + self.bayesian_model = BayesianNN(**self.model_params) + print("Creating Prediction Model") + self.prediction_model = BayesianNN(**self.model_params) def run(self, verbose=True): self.reset() @@ -64,38 +69,45 @@ def run(self, verbose=True): # We will in fact use the results of maximum likelihood as the first prior priors = None - for task_id, (train_iterator, test_iterator) in enumerate(self.data_generator): - print("Task: ", task_id) - self.test_iterators[task_id] = test_iterator + for task in self.data_generator: + print("Task: ", task.task_id) + self.test_iterators[task.task_id] = task.test_iterator # Set the readout head to train_iterator - head = 0 if self.single_head else task_id + head = 0 if self.single_head else task.task_id # Update the coreset, and update the train iterator to remove the coreset data - train_iterator = self.coreset.update(train_iterator) + train_iterator = self.coreset.update(task.train_iterator) - batch_size = train_iterator.provide_label[0].shape[0] if self.batch_size is None else self.batch_size + label_shape = train_iterator.provide_label[0].shape + batch_size = label_shape[0] if self.batch_size is None else self.batch_size # Train network with maximum likelihood to initialize first model - if task_id == 0: + if task.task_id == 0: print("Training non-Bayesian neural network as starting point") self.vanilla_model.train( train_iterator=train_iterator, - validation_iterator=test_iterator, - task_id=task_id, + validation_iterator=task.test_iterator, + head=task.task_id, epochs=5, batch_size=batch_size, verbose=verbose) priors = self.vanilla_model.net.collect_params() train_iterator.reset() + else: + if not self.single_head: + self.network_shape = self.network_shape[0:-1] + \ + (self.network_shape[-1] + (task.number_of_classes,),) + # TODO: Would be nice if we could use the same object here + self.bayesian_model = BayesianNN(**self.model_params) # Train on non-coreset data print("Training main model") self.bayesian_model.train( train_iterator=train_iterator, - validation_iterator=test_iterator, - task_id=head, + validation_iterator=task.test_iterator, + head=head, epochs=self.num_epochs, batch_size=self.batch_size, priors=priors) @@ -105,7 +117,7 @@ def run(self, verbose=True): # Incorporate coreset data and make prediction acc = self.get_scores() - print("Accuracies after task {}: [{}]".format(task_id, ", ".join(map("{:.3f}".format, acc)))) + print("Accuracies after task {}: [{}]".format(task.task_id, ", ".join(map("{:.3f}".format, acc)))) self.overall_accuracy = self.concatenate_results(acc, self.overall_accuracy) def get_scores(self): @@ -121,7 +133,7 @@ def get_scores(self): prediction_model.train( train_iterator=train_iterator, validation_iterator=None, - task_id=0, + head=0, epochs=self.num_epochs, batch_size=batch_size, priors=priors) @@ -139,7 +151,7 @@ def get_scores(self): prediction_model.train( train_iterator=self.coreset.iterator, validation_iterator=None, - task_id=task_id, + head=task_id, epochs=self.num_epochs, batch_size=self.batch_size, priors=self.bayesian_model.posteriors) diff --git a/examples/variational_continual_learning/mnist.py b/examples/variational_continual_learning/mnist.py index c0767fb..ba17930 100644 --- a/examples/variational_continual_learning/mnist.py +++ b/examples/variational_continual_learning/mnist.py @@ -16,6 +16,15 @@ from mxnet.io import NDArrayIter +class Task: + def __init__(self, task_id, task_details, train_iterator, test_iterator, number_of_classes): + self.task_id = task_id + self.task_details = task_details + self.train_iterator = train_iterator + self.test_iterator = test_iterator + self.number_of_classes = number_of_classes + + class TaskGenerator: def __init__(self, data, batch_size, tasks): self.data = data @@ -30,7 +39,7 @@ def __iter__(self): :return: the next task :rtype: NDArrayIter """ - for task in self.tasks: + for i, task in enumerate(self.tasks): idx_train_0 = np.where(self.data['train_label'] == task[0])[0] idx_train_1 = np.where(self.data['train_label'] == task[1])[0] idx_test_0 = np.where(self.data['test_label'] == task[0])[0] @@ -48,7 +57,7 @@ def __iter__(self): batch_size = self.batch_size or x_test.shape[0] test_iter = NDArrayIter(x_test, y_test, batch_size) - yield train_iter, test_iter + yield Task(i, task, train_iter, test_iter, number_of_classes=2) return @@ -59,7 +68,7 @@ def __iter__(self): :return: the next task :rtype: NDArrayIter """ - for _ in self.tasks: + for i, task in enumerate(self.tasks): x_train = self.data['train_data'] y_train = self.data['train_label'] @@ -81,5 +90,5 @@ def __iter__(self): batch_size = self.batch_size or x_test.shape[0] test_iter = NDArrayIter(x_test, y_test, batch_size) - yield train_iter, test_iter + yield Task(i, task, train_iter, test_iter, number_of_classes=y_train.shape[1]) return diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index a684f5f..0f1db94 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -23,7 +23,6 @@ from mxfusion.components.distributions import Normal, Categorical from mxfusion.inference import BatchInferenceLoop, create_Gaussian_meanfield, GradBasedInference, \ StochasticVariationalInference, VariationalPosteriorForwardSampling -import numpy as np from abc import ABC, abstractmethod @@ -31,7 +30,6 @@ class BaseNN(ABC): prefix = None def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): - self.task_idx = mx.nd.array([-1], dtype=np.float32) self.model = None self.network_shape = network_shape self.learning_rate = learning_rate @@ -45,14 +43,32 @@ def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): self.create_net() self.loss = SoftmaxCrossEntropyLoss() + @property + def single_head(self): + if isinstance(self.network_shape[-1], int): + return True + if isinstance(self.network_shape[-1], (tuple, list)): + return False + raise ValueError("Unsupported network shape") + + @property + def num_heads(self): + return 1 if self.single_head else len(self.network_shape[-1]) + def create_net(self): # Create net self.net = HybridSequential(prefix=self.prefix) with self.net.name_scope(): for i in range(1, len(self.network_shape) - 1): self.net.add(Dense(self.network_shape[i], activation="relu", in_units=self.network_shape[i - 1])) - #  Last layer for classification - self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) + + # Last layer for classification - one per head for multi-head networks + if self.single_head: + self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) + else: + for label_shape in self.network_shape[-1]: + self.net.add(Dense(label_shape, in_units=self.network_shape[-2])) + self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) def forward(self, data): @@ -62,6 +78,12 @@ def forward(self, data): return output def evaluate_accuracy(self, data_iterator): + """ + Evaluate the accuracy of the model on the given data iterator + :param data_iterator: data iterator + :return: accuracy + :rtype: float + """ acc = Accuracy() for i, batch in enumerate(data_iterator): output = self.forward(batch.data[0]) @@ -71,7 +93,7 @@ def evaluate_accuracy(self, data_iterator): return acc.get()[1] @abstractmethod - def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs, priors=None, verbose=True): + def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None, verbose=True): raise NotImplementedError def prediction_prob(self, test_iter, task_idx): @@ -92,7 +114,7 @@ def print_status(epoch, loss, train_accuracy=float("nan"), validation_accuracy=f class VanillaNN(BaseNN): prefix = 'vanilla_' - def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs, priors=None, verbose=True): + def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None, verbose=True): trainer = Trainer(self.net.collect_params(), self.optimizer, dict(learning_rate=self.learning_rate)) num_examples = 0 @@ -123,21 +145,50 @@ def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): self.create_model() def create_model(self): - self.model = Model() + self.model = Model(verbose=True) self.model.N = Variable() - self.model.f = MXFusionGluonFunction(self.net, num_outputs=1, broadcastable=False) + self.model.f = MXFusionGluonFunction(self.net, num_outputs=self.num_heads, broadcastable=False) self.model.x = Variable(shape=(self.model.N, self.network_shape[0])) - self.model.r = self.model.f(self.model.x) - self.model.y = Categorical.define_variable(log_prob=self.model.r, shape=(self.model.N, 1), num_classes=2) - for v in self.model.r.factor.parameters.values(): + if self.single_head: + self.model.r = self.model.f(self.model.x) + self.model.y = Categorical.define_variable( + log_prob=self.model.r, shape=(self.model.N, 1), num_classes=self.network_shape[-1]) + self.create_prior_variables(self.model.r) + else: + for head, label_shape in enumerate(self.network_shape[-1]): + r = self.model.f(self.model.x)[head] if self.num_heads > 1 else self.model.f(self.model.x) + setattr(self.model, f'r{head}', r) + y = Categorical.define_variable(log_prob=r, shape=(self.model.N, 1), num_classes=label_shape) + setattr(self.model, f'y{head}', y) + # TODO the statement below could probably be done only for the first head, since they all share the same + # factor parameters + self.create_prior_variables(r) + + def create_prior_variables(self, r): + for v in r.factor.parameters.values(): + # First check that the variables haven't already been created (in multi-head case) + if getattr(self.model, v.inherited_name + "_mean", None) is not None: + continue + if getattr(self.model, v.inherited_name + "_variance", None) is not None: + continue + means = Variable(shape=v.shape) variances = Variable(shape=v.shape) setattr(self.model, v.inherited_name + "_mean", means) setattr(self.model, v.inherited_name + "_variance", variances) v.set_prior(Normal(mean=means, variance=variances)) - def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs, priors=None, verbose=True): + # noinspection PyUnresolvedReferences + def get_net_parameters(self, head): + if self.single_head: + r = self.model.r + else: + r = getattr(self.model, f'r{head}') + return r.factor.parameters + + # noinspection PyUnresolvedReferences + def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None, verbose=True): for i, batch in enumerate(train_iterator): if i > 0: raise NotImplementedError("Currently not supported for more than one batch of data. " @@ -147,29 +198,41 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs labels = mx.nd.expand_dims(batch.label[0], axis=-1).as_in_context(self.ctx) # pass some data to initialise the net - self.net(data[:1]) + # self.net(data[:1]) # TODO: Would rather have done this before! # self.create_model() - observed = [self.model.x, self.model.y] + if self.single_head: + observed = [self.model.x, self.model.y] + kwargs = dict(y=labels, x=data) + else: + observed = [self.model.x, getattr(self.model, f"y{head}")] + kwargs = {'x': data, f'y{head}': labels} + q = create_Gaussian_meanfield(model=self.model, observed=observed) alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed) self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) - self.inference.initialize(y=labels, x=data) + self.inference.initialize(**kwargs) + + def prior_mean(shape): + return mx.nd.zeros(shape=shape) - for v in self.model.r.factor.parameters.values(): + def prior_variance(shape): + return mx.nd.ones(shape=shape) * 3 + + for v in self.get_net_parameters(head).values(): v_name_mean = v.inherited_name + "_mean" v_name_variance = v.inherited_name + "_variance" - if priors is None: - means = mx.nd.zeros(shape=v.shape) - variances = mx.nd.ones(shape=v.shape) * 3 + if priors is None or (v_name_mean not in priors and v_name_variance not in priors): + means = prior_mean(shape=v.shape) + variances = prior_variance(shape=v.shape) elif isinstance(priors, ParameterDict): # This is a maximum likelihood estimate short_name = v.inherited_name.partition(self.prefix)[-1] means = priors.get(short_name).data() - variances = mx.nd.ones(shape=v.shape) * 3 + variances = prior_variance(shape=v.shape) else: # Use posteriors from previous round of inference means = priors[v_name_mean] @@ -188,18 +251,27 @@ def train(self, train_iterator, validation_iterator, task_id, batch_size, epochs self.inference.params.param_dict[variance_prior]._grad_req = 'null' self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, - x=data, y=labels, verbose=False, callback=self.print_status) + verbose=False, callback=self.print_status, **kwargs) + # noinspection PyUnresolvedReferences @property def posteriors(self): q = self.inference.inference_algorithm.posterior posteriors = dict() - for v_name, v in self.model.r.factor.parameters.items(): - posteriors[v.inherited_name + "_mean"] = self.inference.params[q[v.uuid].factor.mean].asnumpy() - posteriors[v.inherited_name + "_variance"] = self.inference.params[q[v.uuid].factor.variance].asnumpy() + if self.single_head: + for v_name, v in self.model.r.factor.parameters.items(): + posteriors[v.inherited_name + "_mean"] = self.inference.params[q[v.uuid].factor.mean].asnumpy() + posteriors[v.inherited_name + "_variance"] = self.inference.params[q[v.uuid].factor.variance].asnumpy() + else: + for head in range(self.num_heads): + for v in self.get_net_parameters(head).values(): + posteriors[v.inherited_name + "_mean"] = self.inference.params[q[v.uuid].factor.mean].asnumpy() + posteriors[v.inherited_name + "_variance"] = \ + self.inference.params[q[v.uuid].factor.variance].asnumpy() return posteriors - def prediction_prob(self, test_iter, task_idx): + # noinspection PyUnresolvedReferences + def prediction_prob(self, test_iter, head): if self.inference is None: raise RuntimeError("Model not yet learnt") @@ -209,9 +281,13 @@ def prediction_prob(self, test_iter, task_idx): "Please switch to using the MinibatchInferenceLoop") data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) - N, D = map(lambda x: mx.nd.array([x], ctx=self.ctx), data.shape) + + if self.single_head: + r = self.model.r + else: + r = getattr(self.model, f'r{head}') prediction_inference = VariationalPosteriorForwardSampling( - 10, [self.model.x], self.inference, [self.model.r]) + 10, [self.model.x], self.inference, [r]) res = prediction_inference.run(x=mx.nd.array(data)) return res[0].asnumpy() From 46b22235fd3beaaff8f54ed4b2253577fcd35e92 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Thu, 22 Nov 2018 10:06:26 +0000 Subject: [PATCH 09/32] Attempt to fix Sequential model for multi-head --- .../variational_continual_learning/models.py | 88 ++++++++++++++----- 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 0f1db94..e0ce552 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -12,9 +12,10 @@ # permissions and limitations under the License. # ============================================================================== import mxnet as mx -from mxnet.gluon import Trainer, ParameterDict +from mxnet.gluon import Trainer, ParameterDict, Block +from mxnet.gluon.contrib.nn import Concurrent from mxnet.gluon.loss import SoftmaxCrossEntropyLoss -from mxnet.gluon.nn import HybridSequential, Dense +from mxnet.gluon.nn import HybridSequential, Dense, Sequential from mxnet.initializer import Xavier from mxnet.metric import Accuracy @@ -55,22 +56,63 @@ def single_head(self): def num_heads(self): return 1 if self.single_head else len(self.network_shape[-1]) - def create_net(self): - # Create net - self.net = HybridSequential(prefix=self.prefix) - with self.net.name_scope(): - for i in range(1, len(self.network_shape) - 1): - self.net.add(Dense(self.network_shape[i], activation="relu", in_units=self.network_shape[i - 1])) + class MLP(Block): + def __init__(self, prefix, network_shape, single_head, **kwargs): + super().__init__(prefix=prefix, **kwargs) + # self.hidden_layers = [] + self.single_head = single_head + + with self.name_scope(): + self.hidden = Sequential() + for i in range(1, len(network_shape) - 1): + self.hidden.add(Dense(network_shape[i], activation="relu", in_units=network_shape[i - 1])) + + # for i in range(1, len(network_shape) - 1): + # self.hidden_layers.append( + # Dense(network_shape[i], activation="relu", in_units=network_shape[i - 1])) + + self.dense1 = Dense(64, activation="relu") + + if single_head: + self.head = Dense(network_shape[-1], in_units=network_shape[-2]) + else: + self.concurrent = Concurrent() + # self.heads = [] + for label_shape in network_shape[-1]: + self.concurrent.add(Dense(label_shape, in_units=network_shape[-2])) + # self.heads.append(Dense(label_shape, in_units=network_shape[-2])) + + def forward(self, x): + for i in range(len(self.hidden)): + x = self.hidden[i](x) + + # for layer in self.hidden_layers: + # x = layer(x) - # Last layer for classification - one per head for multi-head networks if self.single_head: - self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) + return self.head(x) else: - for label_shape in self.network_shape[-1]: - self.net.add(Dense(label_shape, in_units=self.network_shape[-2])) + return tuple(map(lambda h: h(x), self.concurrent)) + # return tuple(map(lambda h: h(x), self.heads)) + def create_net(self): + # Create net + self.net = self.MLP(self.prefix, self.network_shape, self.single_head) self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) + # self.net = HybridSequential(prefix=self.prefix) + # with self.net.name_scope(): + # for i in range(1, len(self.network_shape) - 1): + # self.net.add(Dense(self.network_shape[i], activation="relu", in_units=self.network_shape[i - 1])) + # + # # Last layer for classification - one per head for multi-head networks + # if self.single_head: + # self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) + # else: + # for label_shape in self.network_shape[-1]: + # self.net.add(Dense(label_shape, in_units=self.network_shape[-2])) + # self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) + def forward(self, data): # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height) data = mx.nd.flatten(data).as_in_context(self.ctx) @@ -122,7 +164,7 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p cumulative_loss = 0 for i, batch in enumerate(train_iterator): with mx.autograd.record(): - output = self.forward(batch.data[0]) + output = self.forward(batch.data[0].as_in_context(self.ctx))[head] labels = batch.label[0].as_in_context(self.ctx) loss = self.loss(output, labels) loss.backward() @@ -215,24 +257,18 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) self.inference.initialize(**kwargs) - def prior_mean(shape): - return mx.nd.zeros(shape=shape) - - def prior_variance(shape): - return mx.nd.ones(shape=shape) * 3 - for v in self.get_net_parameters(head).values(): v_name_mean = v.inherited_name + "_mean" v_name_variance = v.inherited_name + "_variance" if priors is None or (v_name_mean not in priors and v_name_variance not in priors): - means = prior_mean(shape=v.shape) - variances = prior_variance(shape=v.shape) + means = self.prior_mean(shape=v.shape) + variances = self.prior_variance(shape=v.shape) elif isinstance(priors, ParameterDict): # This is a maximum likelihood estimate short_name = v.inherited_name.partition(self.prefix)[-1] means = priors.get(short_name).data() - variances = prior_variance(shape=v.shape) + variances = self.prior_variance(shape=v.shape) else: # Use posteriors from previous round of inference means = priors[v_name_mean] @@ -291,3 +327,11 @@ def prediction_prob(self, test_iter, head): 10, [self.model.x], self.inference, [r]) res = prediction_inference.run(x=mx.nd.array(data)) return res[0].asnumpy() + + @staticmethod + def prior_mean(shape): + return mx.nd.zeros(shape=shape) + + @staticmethod + def prior_variance(shape): + return mx.nd.ones(shape=shape) * 3 From 580b44749732d9de6adceb3e2b8c222da68552ee Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Fri, 23 Nov 2018 11:08:07 +0000 Subject: [PATCH 10/32] Separated out the MLP model into a separate class/module Tidying up of experiment runner --- .../coresets.py | 3 + .../experiment.py | 153 +++++++++++------- .../variational_continual_learning/mlp.py | 64 ++++++++ .../variational_continual_learning/models.py | 87 +++------- .../variational_continual_learning.py | 16 +- 5 files changed, 188 insertions(+), 135 deletions(-) create mode 100644 examples/variational_continual_learning/mlp.py diff --git a/examples/variational_continual_learning/coresets.py b/examples/variational_continual_learning/coresets.py index 0ea97c0..a514970 100644 --- a/examples/variational_continual_learning/coresets.py +++ b/examples/variational_continual_learning/coresets.py @@ -42,6 +42,9 @@ def __next__(self): def __len__(self): return len(self.iterators) + def __getitem__(self, item): + return self.iterators[item] + def reset(self): for i in self.iterators: i.reset() diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index 5fe16e7..6853a5e 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -37,7 +37,9 @@ def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_gen self.test_iterators = None self.vanilla_model = None self.bayesian_model = None - self.prediction_model = None + + # self.prediction_models = dict() + self.task_ids = [] @property def model_params(self): @@ -54,13 +56,33 @@ def reset(self): self.network_shape = self.original_network_shape self.overall_accuracy = np.array([]) self.test_iterators = dict() + self.task_ids = [] print("Creating Vanilla Model") self.vanilla_model = VanillaNN(**self.model_params) - print("Creating Bayesian Model") + + # print("Creating Bayesian Model") + # self.bayesian_model = BayesianNN(**self.model_params) + + # if self.single_head: + # print("Creating Prediction Model") + # self.prediction_models[0] = BayesianNN(**self.model_params) + + def new_task(self, task): + if self.single_head: + return + + if len(self.task_ids) > 0: + self.network_shape = self.network_shape[0:-1] + (self.network_shape[-1] + (task.number_of_classes,),) + + self.task_ids.append(task.task_id) + + # TODO: Would be nice if we could use the same object here self.bayesian_model = BayesianNN(**self.model_params) - print("Creating Prediction Model") - self.prediction_model = BayesianNN(**self.model_params) + + # if len(self.coreset.iterator) > 0: + # # We'll keep the prediction model for each task since they'll get reused + # self.prediction_models[task.task_id] = BayesianNN(**self.model_params) def run(self, verbose=True): self.reset() @@ -83,24 +105,20 @@ def run(self, verbose=True): batch_size = label_shape[0] if self.batch_size is None else self.batch_size # Train network with maximum likelihood to initialize first model - if task.task_id == 0: + if len(self.task_ids) == 0: print("Training non-Bayesian neural network as starting point") self.vanilla_model.train( train_iterator=train_iterator, validation_iterator=task.test_iterator, - head=task.task_id, + head=head, epochs=5, batch_size=batch_size, verbose=verbose) priors = self.vanilla_model.net.collect_params() train_iterator.reset() - else: - if not self.single_head: - self.network_shape = self.network_shape[0:-1] + \ - (self.network_shape[-1] + (task.number_of_classes,),) - # TODO: Would be nice if we could use the same object here - self.bayesian_model = BayesianNN(**self.model_params) + + self.new_task(task) # Train on non-coreset data print("Training main model") @@ -110,7 +128,8 @@ def run(self, verbose=True): head=head, epochs=self.num_epochs, batch_size=self.batch_size, - priors=priors) + priors=priors, + verbose=verbose) # Set the priors for the next round of inference to be the current posteriors priors = self.bayesian_model.posteriors @@ -118,65 +137,77 @@ def run(self, verbose=True): # Incorporate coreset data and make prediction acc = self.get_scores() print("Accuracies after task {}: [{}]".format(task.task_id, ", ".join(map("{:.3f}".format, acc)))) - self.overall_accuracy = self.concatenate_results(acc, self.overall_accuracy) - - def get_scores(self): - acc = [] - prediction_model = self.prediction_model - + self.overall_accuracy = concatenate_results(acc, self.overall_accuracy) + + def get_coreset(self, task_id): + """ + For multi-headed models gets the coreset for the given task id. + For single-headed models this will return a merged coreset + :param task_id: The task id + :return: iterator for the coreset + """ if self.single_head: + # TODO: Cache the results if this is expensive? + return Coreset.merge(self.coreset) + else: if len(self.coreset.iterator) > 0: - train_iterator = Coreset.merge(self.coreset) - batch_size = train_iterator.provide_label.shape[0] if (self.batch_size is None) else self.batch_size - priors = self.bayesian_model.posteriors - print("Training single-head prediction model") - prediction_model.train( - train_iterator=train_iterator, - validation_iterator=None, - head=0, - epochs=self.num_epochs, - batch_size=batch_size, - priors=priors) + return self.coreset.iterator[task_id] else: - print("Using main model as prediction model") - prediction_model = self.bayesian_model + return None + + def fine_tune(self, task_id): + """ + Fine tune the latest trained model using the coreset(s) + :param task_id: the task id + :return: the fine tuned prediction model + """ + train_iterator = self.get_coreset(task_id) + + if train_iterator is None: + print(f"Empty coreset: Using main model as prediction model for task {task_id}") + return self.bayesian_model + + train_iterator.reset() + batch_size = train_iterator.provide_label[0].shape[0] + # prediction_model = self.prediction_models[task_id] + prediction_model = BayesianNN(**self.model_params) + + print(f"Fine tuning prediction model for task {task_id}") + prediction_model.train( + train_iterator=train_iterator, + validation_iterator=None, + head=task_id, + epochs=self.num_epochs, + batch_size=batch_size, + priors=self.bayesian_model.posteriors) + return prediction_model + + def get_scores(self): + scores = [] + # TODO: different learning rate and max iter here? for task_id, test_iterator in self.test_iterators.items(): test_iterator.reset() - if not self.single_head: - # TODO: What's the validation data here? - # TODO: different learning rate and max iter here? - if len(self.coreset.iterator) > 0: - print("Training multi-head prediction model") - prediction_model.train( - train_iterator=self.coreset.iterator, - validation_iterator=None, - head=task_id, - epochs=self.num_epochs, - batch_size=self.batch_size, - priors=self.bayesian_model.posteriors) - else: - print(f"Using main model as prediction model for task {task_id}") - prediction_model = self.bayesian_model head = 0 if self.single_head else task_id + prediction_model = self.fine_tune(task_id) print(f"Generating predictions for task {task_id}") predictions = prediction_model.prediction_prob(test_iterator, head) predicted_means = np.mean(predictions, axis=0) predicted_labels = np.argmax(predicted_means, axis=1) test_labels = test_iterator.label[0][1].asnumpy() - cur_acc = len(np.where(np.abs(predicted_labels - test_labels) < 1e-10)[0]) * 1.0 / test_labels.shape[0] - acc.append(cur_acc) - return acc - - @staticmethod - def concatenate_results(score, all_score): - if all_score.size == 0: - all_score = np.reshape(score, (1, -1)) - else: - new_arr = np.empty((all_score.shape[0], all_score.shape[1] + 1)) - new_arr[:] = np.nan - new_arr[:, :-1] = all_score - all_score = np.vstack((new_arr, score)) - return all_score + score = len(np.where(np.abs(predicted_labels - test_labels) < 1e-10)[0]) * 1.0 / test_labels.shape[0] + scores.append(score) + return scores + + +def concatenate_results(score, all_score): + if all_score.size == 0: + all_score = np.reshape(score, (1, -1)) + else: + new_arr = np.empty((all_score.shape[0], all_score.shape[1] + 1)) + new_arr[:] = np.nan + new_arr[:, :-1] = all_score + all_score = np.vstack((new_arr, score)) + return all_score diff --git a/examples/variational_continual_learning/mlp.py b/examples/variational_continual_learning/mlp.py new file mode 100644 index 0000000..656beb0 --- /dev/null +++ b/examples/variational_continual_learning/mlp.py @@ -0,0 +1,64 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ============================================================================== + +from mxnet.gluon import Block +from mxnet.gluon.contrib.nn import Concurrent +from mxnet.gluon.nn import Dense, Sequential + + +class MLPSequential(Sequential): + def __init__(self, prefix, network_shape, single_head, **kwargs): + super().__init__(prefix=prefix, **kwargs) + + self.network_shape = network_shape + self.single_head = single_head + + with self.name_scope(): + for i in range(1, len(self.network_shape) - 1): + self.add(Dense(self.network_shape[i], activation="relu", in_units=self.network_shape[i - 1])) + + # Last layer for classification - one per head for multi-head networks + if self.single_head: + self.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) + else: + for label_shape in self.network_shape[-1]: + self.add(Dense(label_shape, in_units=self.network_shape[-2])) + + +class MLP(Block): + def __init__(self, prefix, network_shape, single_head, **kwargs): + super().__init__(prefix=prefix, **kwargs) + + self.single_head = single_head + + with self.name_scope(): + self.hidden = Sequential() + for i in range(1, len(network_shape) - 1): + self.hidden.add(Dense(network_shape[i], activation="relu", in_units=network_shape[i - 1])) + + if single_head: + self.head = Dense(network_shape[-1], in_units=network_shape[-2]) + else: + self.concurrent = Concurrent() + for label_shape in network_shape[-1]: + self.concurrent.add(Dense(label_shape, in_units=network_shape[-2])) + + def forward(self, x): + for i in range(len(self.hidden)): + x = self.hidden[i](x) + + if self.single_head: + return self.head(x) + else: + return tuple(map(lambda h: h(x), self.concurrent)) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index e0ce552..8b498d4 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -12,10 +12,8 @@ # permissions and limitations under the License. # ============================================================================== import mxnet as mx -from mxnet.gluon import Trainer, ParameterDict, Block -from mxnet.gluon.contrib.nn import Concurrent +from mxnet.gluon import Trainer, ParameterDict from mxnet.gluon.loss import SoftmaxCrossEntropyLoss -from mxnet.gluon.nn import HybridSequential, Dense, Sequential from mxnet.initializer import Xavier from mxnet.metric import Accuracy @@ -26,6 +24,8 @@ StochasticVariationalInference, VariationalPosteriorForwardSampling from abc import ABC, abstractmethod +from .mlp import MLP + class BaseNN(ABC): prefix = None @@ -56,79 +56,32 @@ def single_head(self): def num_heads(self): return 1 if self.single_head else len(self.network_shape[-1]) - class MLP(Block): - def __init__(self, prefix, network_shape, single_head, **kwargs): - super().__init__(prefix=prefix, **kwargs) - # self.hidden_layers = [] - self.single_head = single_head - - with self.name_scope(): - self.hidden = Sequential() - for i in range(1, len(network_shape) - 1): - self.hidden.add(Dense(network_shape[i], activation="relu", in_units=network_shape[i - 1])) - - # for i in range(1, len(network_shape) - 1): - # self.hidden_layers.append( - # Dense(network_shape[i], activation="relu", in_units=network_shape[i - 1])) - - self.dense1 = Dense(64, activation="relu") - - if single_head: - self.head = Dense(network_shape[-1], in_units=network_shape[-2]) - else: - self.concurrent = Concurrent() - # self.heads = [] - for label_shape in network_shape[-1]: - self.concurrent.add(Dense(label_shape, in_units=network_shape[-2])) - # self.heads.append(Dense(label_shape, in_units=network_shape[-2])) - - def forward(self, x): - for i in range(len(self.hidden)): - x = self.hidden[i](x) - - # for layer in self.hidden_layers: - # x = layer(x) - - if self.single_head: - return self.head(x) - else: - return tuple(map(lambda h: h(x), self.concurrent)) - # return tuple(map(lambda h: h(x), self.heads)) - def create_net(self): # Create net - self.net = self.MLP(self.prefix, self.network_shape, self.single_head) + self.net = MLP(self.prefix, self.network_shape, self.single_head) self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) - # self.net = HybridSequential(prefix=self.prefix) - # with self.net.name_scope(): - # for i in range(1, len(self.network_shape) - 1): - # self.net.add(Dense(self.network_shape[i], activation="relu", in_units=self.network_shape[i - 1])) - # - # # Last layer for classification - one per head for multi-head networks - # if self.single_head: - # self.net.add(Dense(self.network_shape[-1], in_units=self.network_shape[-2])) - # else: - # for label_shape in self.network_shape[-1]: - # self.net.add(Dense(label_shape, in_units=self.network_shape[-2])) - # self.net.initialize(Xavier(magnitude=2.34), ctx=self.ctx) - def forward(self, data): # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height) data = mx.nd.flatten(data).as_in_context(self.ctx) output = self.net(data) return output - def evaluate_accuracy(self, data_iterator): + def evaluate_accuracy(self, data_iterator, head=0): """ Evaluate the accuracy of the model on the given data iterator :param data_iterator: data iterator + :param head: the head of the network (for multi-head models) :return: accuracy :rtype: float """ acc = Accuracy() for i, batch in enumerate(data_iterator): - output = self.forward(batch.data[0]) + if self.single_head: + output = self.forward(batch.data[0]) + else: + output = self.forward(batch.data[0])[head] + labels = batch.label[0].as_in_context(self.ctx) predictions = mx.nd.argmax(output, axis=1) acc.update(preds=predictions, labels=labels) @@ -174,8 +127,8 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p train_iterator.reset() validation_iterator.reset() - train_accuracy = self.evaluate_accuracy(train_iterator) - validation_accuracy = self.evaluate_accuracy(validation_iterator) + train_accuracy = self.evaluate_accuracy(train_iterator, head=head) + validation_accuracy = self.evaluate_accuracy(validation_iterator, head=head) self.print_status(epoch + 1, cumulative_loss / num_examples, train_accuracy, validation_accuracy) @@ -286,6 +239,10 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p self.inference.params.param_dict[mean_prior]._grad_req = 'null' self.inference.params.param_dict[variance_prior]._grad_req = 'null' + if self.single_head: + print(f"Running single-headed inference") + else: + print(f"Running multi-headed inference for head {head}") self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, verbose=False, callback=self.print_status, **kwargs) @@ -318,13 +275,11 @@ def prediction_prob(self, test_iter, head): data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) - if self.single_head: - r = self.model.r - else: - r = getattr(self.model, f'r{head}') + r = self.model.r if self.single_head else getattr(self.model, f'r{head}') + + print(data.shape) - prediction_inference = VariationalPosteriorForwardSampling( - 10, [self.model.x], self.inference, [r]) + prediction_inference = VariationalPosteriorForwardSampling(10, [self.model.x], self.inference, [r]) res = prediction_inference.run(x=mx.nd.array(data)) return res[0].asnumpy() diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 8cd3b9a..0f23001 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -90,20 +90,20 @@ def plot(title, experiments, num_tasks): optimizer = 'adam' experiment_parameters = ( - dict( - coreset=Vanilla(), - learning_rate=learning_rate, - optimizer=optimizer, - network_shape=network_shape, - num_epochs=num_epochs, - single_head=single_head), # dict( - # coreset=Random(coreset_size=coreset_size), + # coreset=Vanilla(), # learning_rate=learning_rate, # optimizer=optimizer, # network_shape=network_shape, # num_epochs=num_epochs, # single_head=single_head), + dict( + coreset=Random(coreset_size=coreset_size), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=single_head), # dict( # coreset=KCenter(coreset_size=coreset_size), # learning_rate=learning_rate, From c20a9ede6dd5690042741a69c3418f118627c226 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Fri, 23 Nov 2018 11:09:42 +0000 Subject: [PATCH 11/32] Bug fix in function evaluation --- mxfusion/components/functions/function_evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mxfusion/components/functions/function_evaluation.py b/mxfusion/components/functions/function_evaluation.py index c1fa1f4..3b48164 100644 --- a/mxfusion/components/functions/function_evaluation.py +++ b/mxfusion/components/functions/function_evaluation.py @@ -89,7 +89,7 @@ def eval_RT(self, F, always_return_tuple=False, **input_kws): results.append(r) if isinstance(results[0], (list, tuple)): # if the function has multiple output variables. - results = [F.concat([r[i] for r in results], dim=0) for + results = [F.concat(*[r[i] for r in results], dim=0) for i in range(len(results[0]))] else: results = F.concat(*results, dim=0) From 6110fadfa0d7c3d8b472e8d7c8d8d1d5a33e9d41 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Fri, 23 Nov 2018 23:50:00 +0000 Subject: [PATCH 12/32] Added some extra print statements --- .../experiment.py | 32 ++++++++------ .../variational_continual_learning/models.py | 44 +++++++++++++------ .../variational_continual_learning.py | 3 +- 3 files changed, 52 insertions(+), 27 deletions(-) diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index 6853a5e..d4a4b70 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -20,7 +20,7 @@ class Experiment: def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_generator, - coreset, batch_size, single_head, ctx): + coreset, batch_size, single_head, ctx, verbose): self.network_shape = network_shape self.original_network_shape = network_shape # Only used when resetting self.num_epochs = num_epochs @@ -31,6 +31,7 @@ def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_gen self.batch_size = batch_size self.single_head = single_head self.context = ctx + self.verbose = verbose # The following are to keep lint happy: self.overall_accuracy = None @@ -48,7 +49,8 @@ def model_params(self): learning_rate=self.learning_rate, optimizer=self.optimizer, max_iter=self.num_epochs, - ctx=self.context + ctx=self.context, + verbose=self.verbose ) def reset(self): @@ -84,7 +86,7 @@ def new_task(self, task): # # We'll keep the prediction model for each task since they'll get reused # self.prediction_models[task.task_id] = BayesianNN(**self.model_params) - def run(self, verbose=True): + def run(self): self.reset() # To begin with, set the priors to None. @@ -112,10 +114,10 @@ def run(self, verbose=True): validation_iterator=task.test_iterator, head=head, epochs=5, - batch_size=batch_size, - verbose=verbose) + batch_size=batch_size) priors = self.vanilla_model.net.collect_params() + print(f"Number of variables in priors: {len(priors.items())}") train_iterator.reset() self.new_task(task) @@ -128,11 +130,11 @@ def run(self, verbose=True): head=head, epochs=self.num_epochs, batch_size=self.batch_size, - priors=priors, - verbose=verbose) + priors=priors) # Set the priors for the next round of inference to be the current posteriors priors = self.bayesian_model.posteriors + print(f"Number of variables in priors: {len(priors)}") # Incorporate coreset data and make prediction acc = self.get_scores() @@ -161,25 +163,29 @@ def fine_tune(self, task_id): :param task_id: the task id :return: the fine tuned prediction model """ - train_iterator = self.get_coreset(task_id) + coreset_iterator = self.get_coreset(task_id) - if train_iterator is None: + if coreset_iterator is None: print(f"Empty coreset: Using main model as prediction model for task {task_id}") return self.bayesian_model - train_iterator.reset() - batch_size = train_iterator.provide_label[0].shape[0] + coreset_iterator.reset() + batch_size = coreset_iterator.provide_label[0].shape[0] # prediction_model = self.prediction_models[task_id] prediction_model = BayesianNN(**self.model_params) + priors = self.bayesian_model.posteriors + print(f"Number of variables in priors: {len(priors)}") + print(f"Fine tuning prediction model for task {task_id}") prediction_model.train( - train_iterator=train_iterator, + train_iterator=coreset_iterator, validation_iterator=None, head=task_id, epochs=self.num_epochs, batch_size=batch_size, - priors=self.bayesian_model.posteriors) + priors=priors) + return prediction_model def get_scores(self): diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 8b498d4..b0a0944 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -30,7 +30,7 @@ class BaseNN(ABC): prefix = None - def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): + def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx, verbose): self.model = None self.network_shape = network_shape self.learning_rate = learning_rate @@ -43,6 +43,7 @@ def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): self.inference = None self.create_net() self.loss = SoftmaxCrossEntropyLoss() + self.verbose = verbose @property def single_head(self): @@ -88,7 +89,7 @@ def evaluate_accuracy(self, data_iterator, head=0): return acc.get()[1] @abstractmethod - def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None, verbose=True): + def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None): raise NotImplementedError def prediction_prob(self, test_iter, task_idx): @@ -109,7 +110,7 @@ def print_status(epoch, loss, train_accuracy=float("nan"), validation_accuracy=f class VanillaNN(BaseNN): prefix = 'vanilla_' - def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None, verbose=True): + def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None): trainer = Trainer(self.net.collect_params(), self.optimizer, dict(learning_rate=self.learning_rate)) num_examples = 0 @@ -135,12 +136,12 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p class BayesianNN(BaseNN): prefix = 'bayesian_' - def __init__(self, network_shape, learning_rate, optimizer, max_iter, ctx): - super().__init__(network_shape, learning_rate, optimizer, max_iter, ctx) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.create_model() def create_model(self): - self.model = Model(verbose=True) + self.model = Model(verbose=self.verbose) self.model.N = Variable() self.model.f = MXFusionGluonFunction(self.net, num_outputs=self.num_heads, broadcastable=False) self.model.x = Variable(shape=(self.model.N, self.network_shape[0])) @@ -151,14 +152,15 @@ def create_model(self): log_prob=self.model.r, shape=(self.model.N, 1), num_classes=self.network_shape[-1]) self.create_prior_variables(self.model.r) else: + r = self.model.f(self.model.x) for head, label_shape in enumerate(self.network_shape[-1]): - r = self.model.f(self.model.x)[head] if self.num_heads > 1 else self.model.f(self.model.x) - setattr(self.model, f'r{head}', r) - y = Categorical.define_variable(log_prob=r, shape=(self.model.N, 1), num_classes=label_shape) + rh = r[head] if self.num_heads > 1 else r + setattr(self.model, f'r{head}', rh) + y = Categorical.define_variable(log_prob=rh, shape=(self.model.N, 1), num_classes=label_shape) setattr(self.model, f'y{head}', y) # TODO the statement below could probably be done only for the first head, since they all share the same # factor parameters - self.create_prior_variables(r) + self.create_prior_variables(rh) def create_prior_variables(self, r): for v in r.factor.parameters.values(): @@ -183,7 +185,7 @@ def get_net_parameters(self, head): return r.factor.parameters # noinspection PyUnresolvedReferences - def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None, verbose=True): + def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None): for i, batch in enumerate(train_iterator): if i > 0: raise NotImplementedError("Currently not supported for more than one batch of data. " @@ -192,8 +194,11 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) labels = mx.nd.expand_dims(batch.label[0], axis=-1).as_in_context(self.ctx) + if self.verbose: + print(f"Data shape {data.shape}") + # pass some data to initialise the net - # self.net(data[:1]) + self.net(data[:1]) # TODO: Would rather have done this before! # self.create_model() @@ -204,6 +209,11 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p else: observed = [self.model.x, getattr(self.model, f"y{head}")] kwargs = {'x': data, f'y{head}': labels} + # observed = [self.model.x] + [getattr(self.model, f"y{h}") for h in range(self.num_heads)] + # kwargs = {'x': data, f'y{head}': labels} + # for h in range(self.num_heads): + # if h != head: + # kwargs[f"y{h}"] = None q = create_Gaussian_meanfield(model=self.model, observed=observed) alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed) @@ -250,6 +260,8 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p @property def posteriors(self): q = self.inference.inference_algorithm.posterior + + # TODO: don't convert to numpy arrays posteriors = dict() if self.single_head: for v_name, v in self.model.r.factor.parameters.items(): @@ -261,6 +273,8 @@ def posteriors(self): posteriors[v.inherited_name + "_mean"] = self.inference.params[q[v.uuid].factor.mean].asnumpy() posteriors[v.inherited_name + "_variance"] = \ self.inference.params[q[v.uuid].factor.variance].asnumpy() + print(f"Head {head}, variable {v.inherited_name}, " + f"shape {posteriors[v.inherited_name + '_mean'].shape}") return posteriors # noinspection PyUnresolvedReferences @@ -275,9 +289,13 @@ def prediction_prob(self, test_iter, head): data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) + # pass some data to initialise the net + self.net(data[:1]) + r = self.model.r if self.single_head else getattr(self.model, f'r{head}') - print(data.shape) + if self.verbose: + print(f"Data shape {data.shape}") prediction_inference = VariationalPosteriorForwardSampling(10, [self.model.x], self.inference, [r]) res = prediction_inference.run(x=mx.nd.array(data)) diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 0f23001..6d80b87 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -60,6 +60,7 @@ def plot(title, experiments, num_tasks): # Load data data = mx.test_utils.get_mnist() input_dim = int(np.prod(data['train_data'][0].shape)) # Note the data will get flattened later + verbose = True # noinspection PyUnreachableCode if True: @@ -123,7 +124,7 @@ def plot(title, experiments, num_tasks): set_seeds() experiment = Experiment(batch_size=batch_size, data_generator=gen(data, batch_size=batch_size, tasks=tasks), - ctx=CTX, + ctx=CTX, verbose=verbose, **params) experiment.run() print(experiment.overall_accuracy) From e7a3ac6509c052ace40514bc37f1cc4ffc51a830 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Fri, 23 Nov 2018 23:51:12 +0000 Subject: [PATCH 13/32] Changed transpose property to function. This stops the debugger accidentally creating new variables on access --- mxfusion/components/variables/variable.py | 3 +-- testing/components/functions/operators_test.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mxfusion/components/variables/variable.py b/mxfusion/components/variables/variable.py index 4ec0f34..cc2f10f 100644 --- a/mxfusion/components/variables/variable.py +++ b/mxfusion/components/variables/variable.py @@ -264,7 +264,6 @@ def __pow__(self, y): from ..functions.operators import power return power(self, y) - @property - def T(self): + def transpose(self): from ..functions.operators import transpose return transpose(self) diff --git a/testing/components/functions/operators_test.py b/testing/components/functions/operators_test.py index 196c078..6642781 100644 --- a/testing/components/functions/operators_test.py +++ b/testing/components/functions/operators_test.py @@ -93,7 +93,7 @@ def test_operators_variable_builtins(self, mxf_operator, mxnet_operator, inputs, elif case == "pow": m2.r = v12 ** v22 elif case == "transpose": - m2.r = v12.T + m2.r = v12.transpose() vs2 = [v for v in m2.r.factor.inputs] variables_rt2 = {v[1].uuid: inputs[i] for i,v in enumerate(vs2)} p_eval = m2.r.factor.eval(mx.nd, variables=variables_rt2) From ec39612ffa226f4b3decaf3d4d9dbaecc891e900 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Mon, 26 Nov 2018 12:11:19 +0000 Subject: [PATCH 14/32] Experimenting with multi-head methods --- examples/variational_continual_learning/models.py | 8 ++++++-- mxfusion/inference/meanfield.py | 15 ++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index b0a0944..fc5889b 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -205,17 +205,21 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p if self.single_head: observed = [self.model.x, self.model.y] + ignored = None kwargs = dict(y=labels, x=data) else: observed = [self.model.x, getattr(self.model, f"y{head}")] - kwargs = {'x': data, f'y{head}': labels} + y_other = [getattr(self.model, f"y{h}") for h in range(self.num_heads) if h != head] + r_other = [getattr(self.model, f"r{h}") for h in range(self.num_heads) if h != head] + ignored = y_other + kwargs = {'x': data, f'y{head}': labels, 'ignored': y_other + r_other} # observed = [self.model.x] + [getattr(self.model, f"y{h}") for h in range(self.num_heads)] # kwargs = {'x': data, f'y{head}': labels} # for h in range(self.num_heads): # if h != head: # kwargs[f"y{h}"] = None - q = create_Gaussian_meanfield(model=self.model, observed=observed) + q = create_Gaussian_meanfield(model=self.model, ignored=ignored, observed=observed) alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed) self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) self.inference.initialize(**kwargs) diff --git a/mxfusion/inference/meanfield.py b/mxfusion/inference/meanfield.py index 537bec1..621b8b6 100644 --- a/mxfusion/inference/meanfield.py +++ b/mxfusion/inference/meanfield.py @@ -21,22 +21,27 @@ from ..common.config import get_default_dtype -def create_Gaussian_meanfield(model, observed, dtype=None): +def create_Gaussian_meanfield(model, observed, ignored=None, dtype=None): """ - Create the Meanfield posterior for Variational Inference. + Create the mean-field posterior for Variational Inference. - :param model_graph: the definition of the probabilistic model - :type model_graph: Model + :param model: the definition of the probabilistic model + :type model: Model :param observed: A list of observed variables :type observed: [Variable] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] :returns: the resulting posterior representation + :param dtype: Data type of the random variable (float32 or float64) :rtype: Posterior """ dtype = get_default_dtype() if dtype is None else dtype observed = variables_to_UUID(observed) + ignored = variables_to_UUID(ignored) if ignored is not None else [] q = Posterior(model) for v in model.variables.values(): - if v.type == VariableType.RANDVAR and v not in observed: + if v.type == VariableType.RANDVAR and v not in observed and v not in ignored: mean = Variable(shape=v.shape) variance = Variable(shape=v.shape, transformation=PositiveTransformation()) From a25646d7997b311b5029b747557ac97141f1e738 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Tue, 18 Dec 2018 18:13:43 +0000 Subject: [PATCH 15/32] Added version of grad based inference for DataLoader objects Added custom print function to minibatch loop --- mxfusion/inference/__init__.py | 2 +- mxfusion/inference/grad_based_inference.py | 58 ++++++++++++++++++++++ mxfusion/inference/minibatch_loop.py | 9 +++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/mxfusion/inference/__init__.py b/mxfusion/inference/__init__.py index e89b8f3..3adf37f 100644 --- a/mxfusion/inference/__init__.py +++ b/mxfusion/inference/__init__.py @@ -41,7 +41,7 @@ from .minibatch_loop import MinibatchInferenceLoop from .meanfield import create_Gaussian_meanfield from .forward_sampling import ForwardSampling, VariationalPosteriorForwardSampling, ForwardSamplingAlgorithm -from .grad_based_inference import GradBasedInference +from .grad_based_inference import GradBasedInference, GradIteratorBasedInference from .variational import StochasticVariationalInference from .inference_parameters import InferenceParameters from .score_function import ScoreFunctionInference, ScoreFunctionRBInference diff --git a/mxfusion/inference/grad_based_inference.py b/mxfusion/inference/grad_based_inference.py index 236effd..fa2640e 100644 --- a/mxfusion/inference/grad_based_inference.py +++ b/mxfusion/inference/grad_based_inference.py @@ -15,6 +15,7 @@ from .inference import Inference from .batch_loop import BatchInferenceLoop +from .minibatch_loop import MinibatchInferenceLoop class GradBasedInference(Inference): @@ -92,3 +93,60 @@ def run(self, optimizer='adam', learning_rate=1e-3, max_iter=2000, infr_executor=infr, data=data, param_dict=self.params.param_dict, ctx=self.mxnet_context, optimizer=optimizer, learning_rate=learning_rate, max_iter=max_iter, verbose=verbose, callback=callback) + + +class GradIteratorBasedInference(Inference): + """ + An inference method consists of a few components: the applied inference algorithm, the model definition + (optionally a definition of posterior approximation), the inference parameters. + + :param inference_algorithm: The applied inference algorithm + :type inference_algorithm: InferenceAlgorithm + :param grad_loop: The reference to the main loop of gradient optimization + :type grad_loop: GradLoop + :param constants: Specify a list of model variables as constants + :type constants: {Variable: mxnet.ndarray} + :param hybridize: Whether to hybridize the MXNet Gluon block of the inference method. + :type hybridize: boolean + :param dtype: data type for internal numerical representation + :type dtype: {numpy.float64, numpy.float32, 'float64', 'float32'} + :param context: The MXNet context + :type context: {mxnet.cpu or mxnet.gpu} + """ + def __init__(self, inference_algorithm, grad_loop=None, constants=None, + hybridize=False, dtype=None, context=None): + if grad_loop is None: + grad_loop = MinibatchInferenceLoop() + super().__init__( + inference_algorithm=inference_algorithm, constants=constants, + hybridize=hybridize, dtype=dtype, context=context) + self._grad_loop = grad_loop + + def run(self, data, optimizer='adam', learning_rate=1e-3, max_iter=2000, verbose=False, callback=None, **kwargs): + """ + Run the inference method. + + :param optimizer: the choice of optimizer (default: 'adam') + :type optimizer: str + :param learning_rate: the learning rate of the gradient optimizer (default: 0.001) + :type learning_rate: float + :param max_iter: the maximum number of iterations of gradient optimization + :type max_iter: int + :param verbose: whether to print per-iteration messages. + :type verbose: boolean + :param callback: Callback function for custom print statements + :type callback: func + :param kwargs: The keyword arguments specify the data for inferences. The key of each argument is the name of + the corresponding variable in model definition and the value of the argument is the data in numpy array format. + """ + # data = [kwargs[v] for v in self.observed_variable_names] + + if not self._initialized: + raise ValueError("This inference method must be manually initialised, since we don't know the shapes" + "ahead of time.") + + infr = self.create_executor() + return self._grad_loop.run( + infr_executor=infr, data=data, param_dict=self.params.param_dict, + ctx=self.mxnet_context, optimizer=optimizer, + learning_rate=learning_rate, max_iter=max_iter, verbose=verbose, callback=callback) diff --git a/mxfusion/inference/minibatch_loop.py b/mxfusion/inference/minibatch_loop.py index 1ae4754..d296928 100644 --- a/mxfusion/inference/minibatch_loop.py +++ b/mxfusion/inference/minibatch_loop.py @@ -40,7 +40,7 @@ def __init__(self, batch_size=100, rv_scaling=None): if rv_scaling is not None else rv_scaling def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', - learning_rate=1e-3, max_iter=1000, verbose=False): + learning_rate=1e-3, max_iter=1000, verbose=False, callback=None): """ :param infr_executor: The MXNet function that computes the training objective. :type infr_executor: MXNet Gluon Block @@ -56,6 +56,8 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', :type learning_rate: float :param max_iter: the maximum number of iterations of gradient optimization :type max_iter: int + :param callback: Callback function for custom print statements + :type callback: func :param verbose: whether to print per-iteration messages. :type verbose: boolean """ @@ -86,4 +88,7 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', L_e += loss.asscalar() / self.batch_size n_batches += 1 if verbose: - print('epoch-loss: {} '.format(L_e / n_batches)) + if callback is None: + print('epoch-loss: {} '.format(L_e / n_batches)) + else: + callback(e, L_e) From 32c66e237627818d56a2620515f5bcae1795312c Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Thu, 21 Feb 2019 18:14:11 +0000 Subject: [PATCH 16/32] Changing print statements for 3.4 compatability --- .../variational_continual_learning/experiment.py | 12 ++++++------ examples/variational_continual_learning/models.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index d4a4b70..31e038e 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -117,7 +117,7 @@ def run(self): batch_size=batch_size) priors = self.vanilla_model.net.collect_params() - print(f"Number of variables in priors: {len(priors.items())}") + # print("Number of variables in priors: {}".format(len(priors.items()))) train_iterator.reset() self.new_task(task) @@ -134,7 +134,7 @@ def run(self): # Set the priors for the next round of inference to be the current posteriors priors = self.bayesian_model.posteriors - print(f"Number of variables in priors: {len(priors)}") + # print("Number of variables in priors: {}".format(len(priors.items()))) # Incorporate coreset data and make prediction acc = self.get_scores() @@ -166,7 +166,7 @@ def fine_tune(self, task_id): coreset_iterator = self.get_coreset(task_id) if coreset_iterator is None: - print(f"Empty coreset: Using main model as prediction model for task {task_id}") + print("Empty coreset: Using main model as prediction model for task {}".format(task_id)) return self.bayesian_model coreset_iterator.reset() @@ -175,9 +175,9 @@ def fine_tune(self, task_id): prediction_model = BayesianNN(**self.model_params) priors = self.bayesian_model.posteriors - print(f"Number of variables in priors: {len(priors)}") + print("Number of variables in priors: {}".format(len(priors))) - print(f"Fine tuning prediction model for task {task_id}") + print("Fine tuning prediction model for task {}".format(task_id)) prediction_model.train( train_iterator=coreset_iterator, validation_iterator=None, @@ -198,7 +198,7 @@ def get_scores(self): head = 0 if self.single_head else task_id prediction_model = self.fine_tune(task_id) - print(f"Generating predictions for task {task_id}") + print("Generating predictions for task {}".format(task_id)) predictions = prediction_model.prediction_prob(test_iterator, head) predicted_means = np.mean(predictions, axis=0) predicted_labels = np.argmax(predicted_means, axis=1) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index fc5889b..211ed06 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -103,8 +103,8 @@ def get_weights(self): @staticmethod def print_status(epoch, loss, train_accuracy=float("nan"), validation_accuracy=float("nan")): - print(f"Epoch {epoch:4d}. Loss: {loss:8.2f}, " - f"Train accuracy {train_accuracy:.3f}, Validation accuracy {validation_accuracy:.3f}") + print("Epoch {:4d}. Loss: {:8.2f}, Train accuracy {:.3f}, Validation accuracy {:.3f}" + .format(epoch, loss, train_accuracy, validation_accuracy)) class VanillaNN(BaseNN): @@ -195,7 +195,7 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p labels = mx.nd.expand_dims(batch.label[0], axis=-1).as_in_context(self.ctx) if self.verbose: - print(f"Data shape {data.shape}") + print("Data shape {}".format(data.shape)) # pass some data to initialise the net self.net(data[:1]) @@ -254,9 +254,9 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p self.inference.params.param_dict[variance_prior]._grad_req = 'null' if self.single_head: - print(f"Running single-headed inference") + print("Running single-headed inference") else: - print(f"Running multi-headed inference for head {head}") + print("Running multi-headed inference for head {}".format(head)) self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, verbose=False, callback=self.print_status, **kwargs) @@ -277,8 +277,8 @@ def posteriors(self): posteriors[v.inherited_name + "_mean"] = self.inference.params[q[v.uuid].factor.mean].asnumpy() posteriors[v.inherited_name + "_variance"] = \ self.inference.params[q[v.uuid].factor.variance].asnumpy() - print(f"Head {head}, variable {v.inherited_name}, " - f"shape {posteriors[v.inherited_name + '_mean'].shape}") + # print("Head {}, variable {}, shape {}" + # .format(head, v.inherited_name, posteriors[v.inherited_name + '_mean'].shape)) return posteriors # noinspection PyUnresolvedReferences From 33a1576df068654a946af812baf7fc6d0690b901 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Thu, 21 Feb 2019 18:14:28 +0000 Subject: [PATCH 17/32] Fixed bug in factor graph --- mxfusion/models/factor_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mxfusion/models/factor_graph.py b/mxfusion/models/factor_graph.py index 5f27fde..5e7bab2 100644 --- a/mxfusion/models/factor_graph.py +++ b/mxfusion/models/factor_graph.py @@ -424,7 +424,7 @@ def _clone(self, new_model, leaves=None): replication_function=lambda x: ('recursive', 'recursive')) setattr(new_model, v.name, new_leaf) else: - v.graph = new_model.graph + v.components_graph = new_model.components_graph for v in self.variables.values(): if v.name is not None: setattr(new_model, v.name, new_model[v.uuid]) From d2cac267e42d61c4e02334c54e32f1ff4bfd0e0e Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Fri, 22 Feb 2019 19:55:17 +0000 Subject: [PATCH 18/32] Fixes to ignored parts of the graph --- .../variational_continual_learning/models.py | 67 +++++++++++++------ 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 211ed06..738b166 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -22,6 +22,7 @@ from mxfusion.components.distributions import Normal, Categorical from mxfusion.inference import BatchInferenceLoop, create_Gaussian_meanfield, GradBasedInference, \ StochasticVariationalInference, VariationalPosteriorForwardSampling +from mxfusion.components.variables import add_sample_dimension from abc import ABC, abstractmethod from .mlp import MLP @@ -155,9 +156,9 @@ def create_model(self): r = self.model.f(self.model.x) for head, label_shape in enumerate(self.network_shape[-1]): rh = r[head] if self.num_heads > 1 else r - setattr(self.model, f'r{head}', rh) + setattr(self.model, 'r{}'.format(head), rh) y = Categorical.define_variable(log_prob=rh, shape=(self.model.N, 1), num_classes=label_shape) - setattr(self.model, f'y{head}', y) + setattr(self.model, 'y{}'.format(head), y) # TODO the statement below could probably be done only for the first head, since they all share the same # factor parameters self.create_prior_variables(rh) @@ -181,7 +182,7 @@ def get_net_parameters(self, head): if self.single_head: r = self.model.r else: - r = getattr(self.model, f'r{head}') + r = getattr(self.model, 'r{}'.format(head)) return r.factor.parameters # noinspection PyUnresolvedReferences @@ -200,27 +201,20 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p # pass some data to initialise the net self.net(data[:1]) - # TODO: Would rather have done this before! - # self.create_model() - if self.single_head: observed = [self.model.x, self.model.y] ignored = None kwargs = dict(y=labels, x=data) else: - observed = [self.model.x, getattr(self.model, f"y{head}")] - y_other = [getattr(self.model, f"y{h}") for h in range(self.num_heads) if h != head] - r_other = [getattr(self.model, f"r{h}") for h in range(self.num_heads) if h != head] - ignored = y_other - kwargs = {'x': data, f'y{head}': labels, 'ignored': y_other + r_other} - # observed = [self.model.x] + [getattr(self.model, f"y{h}") for h in range(self.num_heads)] - # kwargs = {'x': data, f'y{head}': labels} - # for h in range(self.num_heads): - # if h != head: - # kwargs[f"y{h}"] = None + observed = [self.model.x, getattr(self.model, "y{}".format(head))] + y_other = [getattr(self.model, "y{}".format(h)) for h in range(self.num_heads) if h != head] + r_other = [getattr(self.model, "r{}".format(h)) for h in range(self.num_heads) if h != head] + ignored = y_other + r_other + kwargs = {'x': data, 'y{}'.format(head): labels, 'ignored': dict((v.name, v) for v in ignored)} q = create_Gaussian_meanfield(model=self.model, ignored=ignored, observed=observed) - alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed) + alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed, + ignored=ignored) self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) self.inference.initialize(**kwargs) @@ -296,13 +290,46 @@ def prediction_prob(self, test_iter, head): # pass some data to initialise the net self.net(data[:1]) - r = self.model.r if self.single_head else getattr(self.model, f'r{head}') + r = self.model.r if self.single_head else getattr(self.model, 'r{}'.format(head)) + y = self.model.y if self.single_head else getattr(self.model, 'y{}'.format(head)) + y_other = [getattr(self.model, "y{}".format(h)) for h in range(self.num_heads) if h != head] + r_other = [getattr(self.model, "r{}".format(h)) for h in range(self.num_heads) if h != head] + ignored = y_other + r_other if self.verbose: - print(f"Data shape {data.shape}") + print("Data shape {}".format(data.shape)) + + if len(ignored) > 0: + # Here we need to re-instantiate the ignored variables into the posterior if they don't exist + model = self.inference.inference_algorithm.model # .clone() + old_posterior = self.inference.inference_algorithm.posterior + new_posterior = old_posterior.clone(model=model) + + # Reattach the missing parts of the graph + new_posterior[r].set_prior(new_posterior[r.factor]) + new_posterior[r].factor.predecessors = [(k, new_posterior[v]) for k, v in r.factor.predecessors] + new_posterior[r].factor.successors = [(k, new_posterior[v]) for k, v in r.factor.successors] + new_posterior[y].set_prior(new_posterior[y.factor]) + new_posterior[y].factor.predecessors = [(k, new_posterior[v]) for k, v in y.factor.predecessors] + + # Set the posterior to be the new posterior + self.inference.inference_algorithm._extra_graphs[0] = new_posterior + else: + old_posterior = None + + prediction_inference = VariationalPosteriorForwardSampling( + num_samples=10, observed=[self.model.x], + inherited_inference=self.inference, + target_variables=[r], + ignored=ignored # dict((v.name, v) for v in ignored) + ) - prediction_inference = VariationalPosteriorForwardSampling(10, [self.model.x], self.inference, [r]) res = prediction_inference.run(x=mx.nd.array(data)) + + if old_posterior is not None: + # Set the posterior back to the old posterior + self.inference.inference_algorithm._extra_graphs[0] = old_posterior + return res[0].asnumpy() @staticmethod From ab266c3f5a26408248cd339d250b4fe320d3b0a3 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Fri, 22 Feb 2019 20:06:04 +0000 Subject: [PATCH 19/32] Support for ignored variables. These are variables that are not observed, but also will not be inferred. --- mxfusion/inference/forward_sampling.py | 72 +++++++++++++++++++--- mxfusion/inference/grad_based_inference.py | 2 + mxfusion/inference/inference_alg.py | 39 ++++++++++-- mxfusion/inference/meanfield.py | 10 ++- mxfusion/inference/variational.py | 22 +++++-- mxfusion/models/factor_graph.py | 8 ++- 6 files changed, 133 insertions(+), 20 deletions(-) diff --git a/mxfusion/inference/forward_sampling.py b/mxfusion/inference/forward_sampling.py index 9ddbb22..39dd443 100644 --- a/mxfusion/inference/forward_sampling.py +++ b/mxfusion/inference/forward_sampling.py @@ -50,8 +50,11 @@ def compute(self, F, variables): :rtype: mxnet.ndarray.ndarray.NDArray or mxnet.symbol.symbol.Symbol """ samples = self.model.draw_samples( - F=F, variables=variables, targets=self.target_variables, - num_samples=self.num_samples) + F=F, + variables=variables, # dict((k, v) for k, v in variables.items() if k not in self.ignored_variables), + targets=self.target_variables, + num_samples=self.num_samples, + ignored=self.ignored_variables) return samples @@ -80,24 +83,27 @@ class ForwardSampling(TransferInference): :type dtype: {numpy.float64, numpy.float32, 'float64', 'float32'} :param context: The MXNet context :type context: {mxnet.cpu or mxnet.gpu} + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ def __init__(self, num_samples, model, observed, var_tie, infr_params, target_variables=None, hybridize=False, constants=None, - dtype=None, context=None): + dtype=None, context=None, ignored=None): if target_variables is not None: target_variables = [v.uuid for v in target_variables if isinstance(v, Variable)] infr = ForwardSamplingAlgorithm( num_samples=num_samples, model=model, observed=observed, - target_variables=target_variables) + target_variables=target_variables, ignored=ignored) super(ForwardSampling, self).__init__( inference_algorithm=infr, var_tie=var_tie, infr_params=infr_params, constants=constants, hybridize=hybridize, dtype=dtype, context=context) -def merge_posterior_into_model(model, posterior, observed): +def merge_posterior_into_model(model, posterior, observed, ignored=None): """ Replace the prior distributions of a model with its variational posterior distributions. @@ -107,9 +113,15 @@ def merge_posterior_into_model(model, posterior, observed): :param posterior: Posterior :param observed: A list of observed variables :type observed: [Variable] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ new_model = model.clone() for lv in model.get_latent_variables(observed): + # Test if lv is in ignored + if lv in ignored: + continue v = posterior.extract_distribution_of(posterior[lv]) new_model.replace_subgraph(new_model[v], v) return new_model @@ -135,10 +147,13 @@ class VariationalPosteriorForwardSampling(ForwardSampling): :type dtype: {numpy.float64, numpy.float32, 'float64', 'float32'} :param context: The MXNet context :type context: {mxnet.cpu or mxnet.gpu} + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ def __init__(self, num_samples, observed, inherited_inference, target_variables=None, - hybridize=False, constants=None, dtype=None, context=None): + hybridize=False, constants=None, dtype=None, context=None, ignored=None): if not isinstance(inherited_inference.inference_algorithm, (StochasticVariationalInference, MAP)): raise InferenceError('inherited_inference needs to be a subclass of SVIInference or SVIMiniBatchInference.') @@ -147,11 +162,52 @@ def __init__(self, num_samples, observed, q = inherited_inference.inference_algorithm.posterior model_graph = merge_posterior_into_model( - m, q, observed=inherited_inference.observed_variables) + m, q, observed=inherited_inference.observed_variables, ignored=ignored) super(VariationalPosteriorForwardSampling, self).__init__( num_samples=num_samples, model=model_graph, observed=observed, var_tie={}, infr_params=inherited_inference.params, target_variables=target_variables, hybridize=hybridize, - constants=constants, dtype=dtype, context=context) + constants=constants, dtype=dtype, context=context, ignored=ignored) + + +class VariationalPosteriorForwardSampling2(ForwardSampling): + """ + The forward sampling method for variational inference. + + :param num_samples: the number of samples used in estimating the variational lower bound + :type num_samples: int + :param observed: A list of observed variables + :type observed: [Variable] + :param inherited_inference: the inference method of which the model and inference results are taken + :type inherited_inference: SVIInference or SVIMiniBatchInference + :param target_variables: (optional) the target variables to sample + :type target_variables: [Variable] + :param constants: Specify a list of model variables as constants + :type constants: {Variable: mxnet.ndarray} + :param hybridize: Whether to hybridize the MXNet Gluon block of the inference method. + :type hybridize: boolean + :param dtype: data type for internal numerical representation + :type dtype: {numpy.float64, numpy.float32, 'float64', 'float32'} + :param context: The MXNet context + :type context: {mxnet.cpu or mxnet.gpu} + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] + """ + def __init__(self, num_samples, observed, + inherited_algorithm, inherited_params, inherited_model, inherited_posterior, target_variables=None, + hybridize=False, constants=None, dtype=None, context=None, ignored=None): + if not isinstance(inherited_algorithm, (StochasticVariationalInference, MAP)): + raise InferenceError('inherited_inference needs to be a subclass of SVIInference or SVIMiniBatchInference.') + + model_graph = merge_posterior_into_model( + inherited_model, inherited_posterior, observed=observed, ignored=ignored) + + super().__init__( + num_samples=num_samples, model=model_graph, + observed=observed, + var_tie={}, infr_params=inherited_params, + target_variables=target_variables, hybridize=hybridize, + constants=constants, dtype=dtype, context=context, ignored=ignored) diff --git a/mxfusion/inference/grad_based_inference.py b/mxfusion/inference/grad_based_inference.py index fa2640e..1f3e23f 100644 --- a/mxfusion/inference/grad_based_inference.py +++ b/mxfusion/inference/grad_based_inference.py @@ -86,6 +86,8 @@ def run(self, optimizer='adam', learning_rate=1e-3, max_iter=2000, the corresponding variable in model definition and the value of the argument is the data in numpy array format. """ data = [kwargs[v] for v in self.observed_variable_names] + ignored = [kwargs['ignored'][v] for v in self.observed_variable_names + if 'ignored' in kwargs and v in kwargs['ignored']] self.initialize(**kwargs) infr = self.create_executor() diff --git a/mxfusion/inference/inference_alg.py b/mxfusion/inference/inference_alg.py index 8ed624d..ebacf59 100644 --- a/mxfusion/inference/inference_alg.py +++ b/mxfusion/inference/inference_alg.py @@ -102,6 +102,9 @@ class InferenceAlgorithm(ABC): :param extra_graphs: a list of extra FactorGraph used in the inference algorithm. :type extra_graphs: [FactorGraph] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ def replicate_self(self, model, extra_graphs=None): @@ -115,8 +118,7 @@ def replicate_self(self, model, extra_graphs=None): replicant._observed_names = [v.name for v in observed] return replicant - - def __init__(self, model, observed, extra_graphs=None): + def __init__(self, model, observed, extra_graphs=None, ignored=None): self._model_graph = model self._extra_graphs = extra_graphs if extra_graphs is not None else [] self._graphs = [model] if extra_graphs is None else \ @@ -124,6 +126,10 @@ def __init__(self, model, observed, extra_graphs=None): self._observed = set(observed) self._observed_uuid = variables_to_UUID(observed) self._observed_names = [v.name for v in observed] + ignored = ignored or [] + self._ignored = set(ignored) + self._ignored_uuid = variables_to_UUID(ignored) + self._ignored_names = [v.name for v in ignored] @property def observed_variables(self): @@ -146,6 +152,28 @@ def observed_variable_names(self): """ return self._observed_names + @property + def ignored_variables(self): + """ + The ignored variables in this inference algorithm. + """ + return self._ignored + + @property + def ignored_variable_UUIDs(self): + """ + The UUIDs of the ignored variables in this inference algorithm. + """ + return self._ignored_uuid + + @property + def ignored_variable_names(self): + """ + The names (if exist) of the ignored variables in this inference algorithm. + """ + return self._ignored_names + + @property def model(self): """ @@ -260,12 +288,15 @@ class SamplingAlgorithm(InferenceAlgorithm): :param extra_graphs: a list of extra FactorGraph used in the inference algorithm. :type extra_graphs: [FactorGraph] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ def __init__(self, model, observed, num_samples=1, target_variables=None, - extra_graphs=None): + extra_graphs=None, ignored=None): super(SamplingAlgorithm, self).__init__( - model=model, observed=observed, extra_graphs=extra_graphs) + model=model, observed=observed, extra_graphs=extra_graphs, ignored=ignored) self.num_samples = num_samples self.target_variables = target_variables diff --git a/mxfusion/inference/meanfield.py b/mxfusion/inference/meanfield.py index 621b8b6..02f8337 100644 --- a/mxfusion/inference/meanfield.py +++ b/mxfusion/inference/meanfield.py @@ -38,12 +38,18 @@ def create_Gaussian_meanfield(model, observed, ignored=None, dtype=None): """ dtype = get_default_dtype() if dtype is None else dtype observed = variables_to_UUID(observed) - ignored = variables_to_UUID(ignored) if ignored is not None else [] + ignored = variables_to_UUID(ignored or []) q = Posterior(model) for v in model.variables.values(): if v.type == VariableType.RANDVAR and v not in observed and v not in ignored: mean = Variable(shape=v.shape) variance = Variable(shape=v.shape, transformation=PositiveTransformation()) - q[v].set_prior(Normal(mean=mean, variance=variance, dtype=dtype)) + prior = Normal(mean=mean, variance=variance, dtype=dtype) + q[v].set_prior(prior) + + # setting a name for the priors so that cloning posteriors works + if not v.name and v.inherited_name: + setattr(q, "{}_prior".format(v.inherited_name), prior) + setattr(q, v.inherited_name, q[v]) return q diff --git a/mxfusion/inference/variational.py b/mxfusion/inference/variational.py index a438763..44c6540 100644 --- a/mxfusion/inference/variational.py +++ b/mxfusion/inference/variational.py @@ -28,11 +28,14 @@ class VariationalInference(InferenceAlgorithm): :param posterior: Posterior :param observed: A list of observed variables :type observed: [Variable] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ - def __init__(self, model, posterior, observed): + def __init__(self, model, posterior, observed, ignored=None): super(VariationalInference, self).__init__( - model=model, observed=observed, extra_graphs=[posterior]) + model=model, observed=observed, extra_graphs=[posterior], ignored=ignored) @property def posterior(self): @@ -84,10 +87,13 @@ class StochasticVariationalInference(VariationalInference): :param posterior: Posterior :param observed: A list of observed variables :type observed: [Variable] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ - def __init__(self, num_samples, model, posterior, observed): + def __init__(self, num_samples, model, posterior, observed, ignored=None): super(StochasticVariationalInference, self).__init__( - model=model, posterior=posterior, observed=observed) + model=model, posterior=posterior, observed=observed, ignored=ignored) self.num_samples = num_samples def compute(self, F, variables): @@ -105,6 +111,12 @@ def compute(self, F, variables): samples = self.posterior.draw_samples( F=F, variables=variables, num_samples=self.num_samples) variables.update(samples) - logL = self.model.log_pdf(F=F, variables=variables) + + if self.ignored_variables: + targets = self.posterior.variables + else: + targets = None + + logL = self.model.log_pdf(F=F, variables=variables, targets=targets) logL = logL - self.posterior.log_pdf(F=F, variables=variables) return -logL, -logL diff --git a/mxfusion/models/factor_graph.py b/mxfusion/models/factor_graph.py index 5e7bab2..e41eef8 100644 --- a/mxfusion/models/factor_graph.py +++ b/mxfusion/models/factor_graph.py @@ -235,7 +235,7 @@ def log_pdf(self, F, variables, targets=None): raise ModelSpecificationError("There is an object in the factor graph that isn't a factor." + "That shouldn't happen.") return logL - def draw_samples(self, F, variables, num_samples=1, targets=None): + def draw_samples(self, F, variables, num_samples=1, targets=None, ignored=None): """ Draw samples from the target variables of the Factor Graph. If the ``targets`` argument is None, draw samples from all the variables that are *not* in the conditional variables. If the ``targets`` argument is given, this method returns a list of samples of variables in the order of the target argument, otherwise it returns a dict of samples where the keys are the UUIDs of variables and the values are the samples. @@ -247,9 +247,13 @@ def draw_samples(self, F, variables, num_samples=1, targets=None): :type num_samples: int :param targets: a list of Variables to draw samples from. :type targets: [UUID] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] :returns: the samples of the target variables. :rtype: (MXNet NDArray or MXNet Symbol,) or {str(UUID): MXNet NDArray or MXNet Symbol} """ + ignored = ignored or () samples = {} for f in self.ordered_factors: if isinstance(f, FunctionEvaluation): @@ -267,6 +271,8 @@ def draw_samples(self, F, variables, num_samples=1, targets=None): continue elif any(known): raise InferenceError("Part of the outputs of the distribution " + f.__class__.__name__ + " has been observed!") + if any(v in ignored for (_, v) in f.outputs): + continue outcome_uuid = [v.uuid for _, v in f.outputs] outcome = f.draw_samples( F=F, num_samples=num_samples, variables=variables, always_return_tuple=True) From 8e1878ff0233efd9b12461918163afe6585cbfc6 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Fri, 22 Feb 2019 20:08:23 +0000 Subject: [PATCH 20/32] tidying --- .../variational_continual_learning.py | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 6d80b87..447522c 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -50,23 +50,28 @@ def plot(title, experiments, num_tasks): ax.legend() ax.set_title(title) - filename = f"vcl_{title}_{datetime.now().isoformat()[:-7]}.pdf" + filename = "vcl_{}_{}.pdf".format(title, datetime.now().isoformat()[:-7]) fig.savefig(filename, bbox_inches='tight') plt.show() plt.close() if __name__ == "__main__": + import warnings + warnings.filterwarnings("ignore", category=UserWarning) + # Load data data = mx.test_utils.get_mnist() input_dim = int(np.prod(data['train_data'][0].shape)) # Note the data will get flattened later - verbose = True + verbose = False # noinspection PyUnreachableCode if True: title = "Split MNIST" - tasks = ((0, 1), (2, 3)) # , (4, 5), (6, 7), (8, 9)) - num_epochs = 1 # 120 + tasks = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)) + num_epochs = 120 + # tasks = ((0, 1), (2, 3)) + # num_epochs = 1 # 120 batch_size = None gen = SplitTaskGenerator label_shape = 2 @@ -75,8 +80,10 @@ def plot(title, experiments, num_tasks): coreset_size = 40 else: title = "Permuted MNIST" - tasks = range(2) # range(10) - num_epochs = 1 # 100 + tasks = range(10) + num_epochs = 100 + # tasks = range(2) + # num_epochs = 1 batch_size = 256 gen = PermutedTaskGenerator label_shape = 10 @@ -91,13 +98,13 @@ def plot(title, experiments, num_tasks): optimizer = 'adam' experiment_parameters = ( - # dict( - # coreset=Vanilla(), - # learning_rate=learning_rate, - # optimizer=optimizer, - # network_shape=network_shape, - # num_epochs=num_epochs, - # single_head=single_head), + dict( + coreset=Vanilla(), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=single_head), dict( coreset=Random(coreset_size=coreset_size), learning_rate=learning_rate, @@ -105,13 +112,13 @@ def plot(title, experiments, num_tasks): network_shape=network_shape, num_epochs=num_epochs, single_head=single_head), - # dict( - # coreset=KCenter(coreset_size=coreset_size), - # learning_rate=learning_rate, - # optimizer=optimizer, - # network_shape=network_shape, - # num_epochs=num_epochs, - # single_head=single_head) + dict( + coreset=KCenter(coreset_size=coreset_size), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=single_head) ) experiments = [] From e8872fac14f5d38599f1b16eb2da095851f3003e Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Fri, 22 Feb 2019 23:27:47 +0000 Subject: [PATCH 21/32] more useful error message --- mxfusion/components/functions/mxfusion_gluon_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mxfusion/components/functions/mxfusion_gluon_function.py b/mxfusion/components/functions/mxfusion_gluon_function.py index 47f8a6d..434e488 100644 --- a/mxfusion/components/functions/mxfusion_gluon_function.py +++ b/mxfusion/components/functions/mxfusion_gluon_function.py @@ -200,7 +200,7 @@ def _override_block_parameters(self, input_kws): ctx = val.context ctx_list = param._ctx_map[ctx.device_typeid&1] if ctx.device_id >= len(ctx_list) or ctx_list[ctx.device_id] is None: - raise Exception + raise ValueError("Context id {} out of range {}".format(ctx.device_id, map(str, ctx_list))) dev_id = ctx_list[ctx.device_id] param._data[dev_id] = val else: From d15373b725d5b576056b4b147a2df40d9c48e6ba Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Fri, 22 Feb 2019 23:29:03 +0000 Subject: [PATCH 22/32] edit to error message --- mxfusion/components/functions/mxfusion_gluon_function.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mxfusion/components/functions/mxfusion_gluon_function.py b/mxfusion/components/functions/mxfusion_gluon_function.py index 434e488..8643e01 100644 --- a/mxfusion/components/functions/mxfusion_gluon_function.py +++ b/mxfusion/components/functions/mxfusion_gluon_function.py @@ -200,7 +200,8 @@ def _override_block_parameters(self, input_kws): ctx = val.context ctx_list = param._ctx_map[ctx.device_typeid&1] if ctx.device_id >= len(ctx_list) or ctx_list[ctx.device_id] is None: - raise ValueError("Context id {} out of range {}".format(ctx.device_id, map(str, ctx_list))) + raise ValueError("Context id {} out of range {}".format( + ctx.device_id, list(map(str, ctx_list)))) dev_id = ctx_list[ctx.device_id] param._data[dev_id] = val else: From 911795e0f354ee20c024b17af5c5ce3f1969f6f6 Mon Sep 17 00:00:00 2001 From: Tom Diethe Date: Sat, 23 Feb 2019 08:53:14 +0000 Subject: [PATCH 23/32] added figures from run on p3 --- .../vcl_Split MNIST_2019-02-23T00:14:49.pdf | Bin 0 -> 17210 bytes .../vcl_Split MNIST_2019-02-23T00:23:14.pdf | Bin 0 -> 15643 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/variational_continual_learning/vcl_Split MNIST_2019-02-23T00:14:49.pdf create mode 100644 examples/variational_continual_learning/vcl_Split MNIST_2019-02-23T00:23:14.pdf diff --git a/examples/variational_continual_learning/vcl_Split MNIST_2019-02-23T00:14:49.pdf b/examples/variational_continual_learning/vcl_Split MNIST_2019-02-23T00:14:49.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ad7097f9b501fbdb9a77a127974d404ee8fef1b0 GIT binary patch literal 17210 zcmd^nc{r8Z7e65!G7pLBAXCYiaZH)#nNTS6JRUQVWJn>IGMCJ{m8p`HA~KI5DW!zW zLxoCy`#tL3$n$+XzvuVg&wY0LJ?q`WT5GSp*Jtf_@5`sFsC*P9f`#$@^BR7$8is-+ z;qF#WFexcGLf6vX)(egX6n!|tFu=nWj!>}lwsdi~hs(&oY~5_2g5rNvQ1bUyG4QtZ zwuNIiDk;0WdBZUq&$^c0-nIlcI1c&=L+BFRtqp9w;ijM;MP<;wt-m)Mq2>zul>e10 z{L0nf=5U08gQbn5n>}1~qq>QXm6NTtH(ZR^_8 zG9li6Gq3FE0`vt(D7yfKD%x7R+W`G*+Pc|$JHS!UFzG`Ty}SvwmaZ`W%)2I9?wn!_ zkLK>cyEK9X+egOdQ?a+YS%er=ROyxI!){cmxfYi7lCLcXzi?{Kxtk*DYF&TfYqaDw z_ec!)7hkwfaZKQ)TAfMbFPuMv^HsAx%n^|O)dw=YUJ3AX&inHnycL_CYCz&rUHCR? zioOVed+UhJ=5VLA=WhX&|4TbLN8A=EF7BfkVhn3 z)4V<*(Xhw72ydIU$JEZfH%}s4S4)wqs54XI&8|~LnY7NZ81_U;N#&VONndYt`xl%F zP`Wzl>%hWqOBonulItduFNtQF%ALv;d6)l6?!jJt*mU{bpqbUAuUuEm=jtC-iTM|Q zjkzd$9L-0Nx+m#s=Ugw_aX;Kw;}ow+1WHceRq#pkx!J_UZ@HI;hQ53>x+RuT>o$;4 zD{x}2uMKBupiFYZ*XWH~A7WB@M(f^WG0*<4$yk03hvx1Ar9enF@kA3`d-|sxYg4(lW!nHTmAl`k~g5HQo28ne?)93C1`|v4P3yG(DZ+zC=c`>gEp! zrqmmBnK$G#&}Y6?-M;_}47)o!I?{6`wyar^bIq~4!@>AAS=|%v%f}98-#n|qcDTF4 zMOU4LDa^4eg0_zA^kp=;6*J}eUDHBgj^t{}m4O%I%U)H*H!pKpHd38)(bu^`DE1N;hK8cN?5rpAquUM7Fa*b%xYaGAGjus$u-qW4p;s`WheKkq2& z&ifWUw81ngdal$WcQqtW%ecKGb84g1lxg&P%f%N(pKe!8S#<8^VM_ybX>qoMoa zJb0_goXG2&%m$>>E$K5}S3hT>yv$JW?f#`I>4XDSPlT*>zr^We?=}$r7PE)%6pgkd zty6dtgI4n~JM}^3n&m-T$K|G^gkZ1IBcE5Z-kca4V92v|OM4LFkoI)7^W}x=UadCN zC&`5AW$wv)Ijw=VLvO^VDjL6xDRTrMa$ol#e%1so$w_}-ea?PU)TzqTWUpW2VHPEw_{&W>-XyC&drx|@pdv%=ebtA8z23;( z&og!9)en7!_`?l(#M zt2#U%-ZmmUo^m-VcPTRV(o)VHlBu;(a=H{brUBomGYbKBvO%w$M=S4>s`@&R1=N+s zPo?H6wJOo$xU?FxW7zC2746?Ets){s(!xeV0t+pBnv<-^rIkqMBHsM4JlM$Pb7jp* z#&EGn2W7rQ@wVm;!lFI~*%97aSaT{?QAMRWXX@}L4c}Y@uFkDzs&WY#`-+2s&r*3x z$@1Nke_EC+_3@!b!yKxZ?5=aQSmtDY?eIHJvpnXpNotOdH6KhZ4J{2$pKK2pxZk*T zXz*+Le&h11QyF5$##2F;x)pM-NyvP|%~~I5bjACYH?BwiyiAs15k!!`X`xn6kwX%= z*zfzAEY-kb`S!?c#`PmBgB+BM3FL6O% z$+TVCxY>Htq*d_Jb!l!7a3usLEdm! zxQR+QVCszD>F}5zRvtCR;yE!YLQN03)60J-y|qkiEqCEj4$b(HGGbY9`>ZW#64B7+8P@SJL?+eb#TPYSf~wFZIrwFBjMu-p+K3duQ)^gW*Yq` zmiS-p7>z-1W>8O(K!Ku~3Wk}(F=Ou6>@j!LXWuH9CfZq&Qd5shzr675>fWCHA1}I} z$g<-I5x7*dazZGxhF$NRp^^#r%)1jEPLWT&cn#hW_+?t#1bVHW?<$EF?}*E^LsFq) zryTJ6?b0cwxd$kD-KMdv$7vlOnU)MJpZsZ{wAxgi@v&bh<7Bbv=NRee){d#-qcb$Q zaqkP@JtNjp%kb0BG8>cxdfG!UA4?OkHD1#KwS#F0)@iOzT*NxmZO=r$cgAay6eNqoyd z#Xe;vL6K9;70Xj|tA4I<*XddJQJaoH(VtezIe$w~LO>JkU)qpa$@$7B|RMkq4*CqGPhukJfW8tgUMrPJXGF3RLx8V#zzdPbgY?Df);~p{s8&?P_upnQe zjtnLgXRZx}9&_wckkGE87Nza%o~i3NCnh(mgeVnp4~21GI>fR1wWHyNQCh>D0;S1t ztXig+-h}og^>MNfpXvN%s(tncZo?h~)OW%jz_fp&OEV9?#QSlGlqbL3VlD1$#P-(izdQpK?&Yb8(_mZjLf-l0AJJ zmY@J(CoG}Sn^mS1uMtPa&7iQHS*LRSr$=^N`Q+=fk|Gk_E~R{e1DFS7%s~rHnn`Xo zHdT^stk*_3kX|e5sV>hnC$!CYlWwz&7+#}_YaOonbUivqu_HH|;zVD^9w$=!gt4%h zFp=>XRWhI ztToip%24*sWuMUm*D%#XV?P0ny&<)WNIvFc^l!fDoiP{7ono;UFy?pe446>+boxt6 zE`5Wi&;)zgOxvO5(}e~J#Rft4bB=3;RnNcE$GaKfx6u_84edl%7|fO>R9_>(RE}z> z)?i}SZKt2@0h#H2qd$kJrvJHqj*TgvO}y!u2CZH!q2XXa*n-5l+NqC@KV*g@UW66$ ztUbBw$T{~s9j8}{wGwKR`MM^^{oLTgTZV}Ts?2saXabz$!uPu}XZtfW@jrIwIeL8) zR10J{`tr>_-!muT<$4nLrA&`T`5Y~+rqW8~bmrl}-$mm(pJ$Jtv-k4K^$Ek(u2t!< z#;9TrNZKC_h#&DUVMl~KiXA{-F1~u40)3Bq%tA7ax=@gZq4xTr2hk55CI#c{8qey*;~RHQgXJtExC6vq+Rf)7wdY2E8-pWq01% z^IN_iE3ME+@9FULKxDKr9twJcvSdCGmAX9381t-V)!c>A>_OWD{@%-Lg0EB$vJ8xy zX{Fr~964Ar8qGSevi@VZv-4QISa8`kLjy&oI~f{L?53H8!C(!Mx?Q+kOm>m{f3JmY znr)%N)&>TLQb^-P(YW)y3>?=hdwj&1#WOkW8pY(kcXHg|RoqtR#*u*$5;r|D2IQHyeejMUx03eA3Db zq~MPH)a%_NBUX|h{a$9bw>`P5h+u9N>dZOv?6uNrd0K3BxkN)rJu=Jmb5Z7{NUg;5 z@xvM%_i3{0??#WO1V5t}!xmBc#A?*-dDG``0@m)~vo85aWzY{N#hzg}`GWm2yB_P< zD!0y!s88ze&qxV!n*|}2*_cKRx@F@k%x*27lK)C~e_wf>oRivkg@DvcpFkZEH}-Z0 zdYcD^E82R~GhLX+)9TdHO!r1iW`iZu%RcSP5T6d2Q|@nxx$VFm(qt?!#b@!_fM1Ep zM*E(L+oitj*&M+FuShQr_~hWlNc}M@;tpNZyOoK9|#(S--QthuXrzK_j!Bk&~0r zKl*8>+-Rd|e@yyHMzi_CLn+n+)c3;#^vF*|1`Se+xbMBXCf;^t{?ti);yxWToPF@~ zyke8ZRoRIr)B9dnW;hCbiwy6KQ^j$f&_1ovrE!x%P-iftkVd`o)@0{Qg-GR$vyS-S zOtC<+4m{hmeAm@jV@)<|j?ao8MH)WKl~hDzhcDhZLs!09#LZ-WBlDcVZpvI)$#xu&ympRx|WxDb+J810(S%IO!wQbb3QzWz13E^}k z%gMNDr|=OX@fcc0ctD>krzL!t6Jd}x>s>&$*&`@js5!3(l;hx7s4&u{NLj#L~~GDM-ehHG|}9;$jsFF3*j@OK6Wqk6V>Ah}6zm20tx8-KgU-rqJoK z(M^$_g2cCEbUy3MkqsX$p2Ki#6e(Y(LfHp;~Em zWseB%lgAQHGNoR4qCs1ho^0k94u&-U$XCj46TTiD zHT9Sn{4Vx}lO6@VX`1kAKfx8@`gV$a8lnr|*+7pI%yQ zUtxTe)OyUdg`Z?`*G~|4O*cnIn4JPd41P0qmDbmA13|`6r%;t_{=60;yLfo^%~{io zHp3Dag6tkD=_+k8-?7Pr3mDCiF9h@VdW(--ob1QAXGKH{R!j(|t0P|$y3?)7bhw%k zO_hcwIYw`C6&4}{ndwQ6zB@-T@|bIVfxYPIKxqV_KFDU5h58FO*+zwK4%HKKjF#W8 zuukOZ-fue6cVAwH}^!R77bpH3QtZb+26AelQ(br)I8c!*VJ@~ z+~2N?VQ~0LEF)&7o5S-2qw_K8Pq}*NyLue-TJ~oa-k|lDxbHh>^tQJEDnvS zrH*^>j${wX(V#wi-bvAMSxyx)x-w58*+NS8Xl*zlwlnCRaE#f|w5Gs(w__m}^R34R ztk%vSbJ55!dwwKJiKVDUkSb(Aj@k2!NyD;DuA*NhX+JM>p93EQEmK{`406%GF<@|* zwQ}sUqC>9YffsR%(t_m;A2V4?3xE96UdEM8&mZJoc{!=Attg!Jg>PWdy-w@>FZ{B< zfBQiyLXyL=&2-o)q();m<5w-chpviXI=p2udq*c=XC~g|i2ap{6my(#@kUj*UHNHe`9sT9T2f%WgHY=0IM<=8fPv(n9(6 zvMsGw$MU;ehN~9*Sc`w$)%e-_YA`l+(K&JTav6qu{j^wwZ?m>znr4i|$|Wk`F8+MX z?;A-J$p_iv+bwcq%97(72nUK_2^1$S20n4>T?#vr)KBewqyJfT+q;|NUwy?C5R31M zs{Mm$r!-3ICh~)X&)k+j*!Edd(oc=|(R=|jqa8hkep=R*WC>}j;QO-c*KCihd~fbt zTBBhVG&{VF{&tH0@tYl$R;(P;6F3#wTn&$ou431^cVN%@>6#cx(0#J=6yxjN6=Q^o2bykQ zYG+MpvIitg%UmgCsZ+d$Eq0SUtEYZN^6d4iVKyPbgU72cs@2_EG!u~2z1LOZT_YCC zLAe)iQJ-*&Zot8vq+W*p^9|*OudLULgyBCs2JM~>qncGq1;%!3OM6N*T=7s7Z`>t& z@z#5m>V^F;-y_&>Ur3*)_K~pFs;SF<_5QhhBGsI}=dO7kPB-)YN_Sm!#;Gfh9ZtYl zLI)$u)!kpZ_IX}pg{b7q47*?|kCjUbGhxWLD6OgC_&;M&~t!_82&s?bD7aattmGc6#@ab4;Tz`dEACI9b$l4(Dwc+bIa& z;;*+=QVN5S2)m{*W!tXq6b~3r_HL4E!L)b(?i9OY0E*j!60T?@kin@Gm$SooLw}4) zrKKMytj}OmHAQ`|7OjP=^A<1(kFx|I*P0gFZSH-#%XKp=cGpE7mbTHupH0s7WeL4+ zyQ_HjOBtH*`qTdN@Ar(~&NM8q(rc6`kzf@s7;pJjGW(-SxaBc#6q>@NA5SOh@$sHz z40CR~kzdqz#rVR+;@tPX@h&Up6B{45^s!cHi+-Qv+e;Z1l0&VzsCC^ZQuf1`P#}T& z>w@RQCE3#ZHPm@7RHID{F?4bdwQryGocb}mE>WK~{)*N(&Tm0d)A`{C1giPsgHWYe zmKRIX?6;<9_s=z_SrdAL6~@gG z+avNS^D4W~WP4Yu&(+@I)~X0CER)^sCqQ0&mx8&Q0^zDrO(SDU-O=bjA?k)>8YTA$ zHOl+okfa~{pusWOd%;1}>j=#)=KXebfl0X>*BK;Ohi7h#5(;KJR#iN{oc^LxHk;Z0 z?1{}sx^TNMLbGQw_UoCyu{qmg?bh#nmi22%Y2`opAH#<_%rV!=XN$RJ)+ma-B*M3m zbf>ry12$?mZBFSu^lU)VPT?rn=d)Ilv6sE>v<%hmk(Fe%+(#~3Qe&h3z-%(%czNBZ zO4KBa)~bv}!4JAG@gF7*402Z+^rr;vrD}a9l^lfEcaiB z&D&nEQE@7}(`qZU`0xw$-f-5j3py;(5##6R?06Og$gG0C4q6!-BT5)K`qiQ+^~rl& z+?bG9rSi%1oG*=^=dG+u$xy^t#~{eIX9J}m(Ej06UBL%C4R!} zH9C=(SrY?chhH?1K6Md^ebO|dpcH-GDE#!5e)d?N=_h*P>NtrcA5V6f3cSQ?S+&B$ zsiQ-INd?l>-}UV06^3*q`GvBv3c<{aE!P$L82b--o_x5dcPK&BGfyX0`{>)*(qIc) zj~O*THl~2?>=Uz+qmPfCwN9El*bu6)%_@!EFnt~cuv2U_GudP zB_3LJt=;Rl2+R7f=cKvZZ(K<7p1-~N@pEhE(Hd>(IK}Ems~4R;3nXBQ9?DLO7riYT z%9|AO`p9w8WX^XsJO`Ej z{YGO!9>Q;TO)$CQEGJz3wTDi1P?ZD|+F;l+GKxH#RyfjNVRu@t1+Ge2KQ?PRx76*l z&$+QcgPUKLDG%%T`g%&Yw{mR9*G@4Q8Y8-CanRD$a8u-_9rC(p_D<~(H))4L-|_LE zLuK`bGlwJho>is)RDS%xl!28`^z>rB*3}kt0Bg}louPF-+FfqzT}^V8_hYiG#+dpW z>JCYL#PV_q3*y167{5gJUEiJjAmT>x$AlmIU+qFt+>7z~M5}(TgyiYl z!-c`Mq=#b74YnD~ouZ?y-)BTC0Xjlrk(3F_k+LaQqUA4gv#XLkYx@{RJr;Umx9q+f z;*_`fqmJw$X`I-@zfH$Gg+eIY=9ow+{sA<}6upkiQ8SfNO_n|H_FFGzUsbPkCpj8* zhtHtopa4(7>j8()$N;))T6{*rFZu5!?jvxr$*H4K{k2gybMNP*MZK7+Dk`b+u}kiC zY;9>cyX0VuNtCX`O$y9wO{`p#sw|!`y?%!#DOmQdc$S(jD)^4ox!g?Ovpku5-^p;O zqzUnQTDifdmpMr69vfFRexP_mDi*Mflsm;$m@VH6kLu&&4xwmU*I*$QBvXqE*#|UV zd|&rgJ5E7S;?gQLN~l-l3lWYV(sO=4plo-&ZvM{Awum{^p(cq7&P_^Y%_(*Eio&C) zXHw6?=7v@{4y^vu)?7i^h!|YvFq$r=Zk2DmgsH4Q99Mt1smx-ErzFVVAg{jo(8~{( z)@2&J*u&T!^Ey01td8I0^?#hF^h(-psM3@^#Ze`hCFT5#V@K<##+0z}^AD?V5GhN_ zI~j-GK>9TXh{T~?Z55p@?a${mHhdj>c{!ile4^1dPkh&E+Tl7f_4GEE8}Oe^pSTOr z3T~@l-&eAbem-fXk6@9Z^G9ZOHUt4K~Bj_Iao~#d( zgU)FY%J=b4kMNxl^$jE}K1x49-(0M!%Gz5E)6Q*T(Ri7%l72D0=SA!M+pEhQXV*zF z@6V9_Jt(-H;!e@cz6*m9H2@I-au-uULF6|3@WO59gn4P=Q@BJFXsnU0RxWuKc2>J> zPQX#{>K+op3e%Nsc-bj-!=N@#0~`n(!88D$Ydp?%q}B#!dzU-S*4_#iI8u64R$c^g zqDLd-1>aD9{iyN5Wxg_bnaeZjZTbz%Y!xh>NAIX~GTuK5BiCb=UO@}6^1SGLF(Kh^ zc~yq&4uiH3n?GeD$?iKNr~Q+(&p(mmL=Q72UfIWR=xMsfB`wt?X|*3mK7Pqd#(IbF zAMsiZA{4VYfe)8s>G_`+dJw!ZUP*|_3xyUI9gn|lA{I?d>a$a*H-jNoMI zlB<_{UsLlIsA&1l)Rmix&T%W|u406fo<0{mL>i@SeF)Z58p?PM|ETohqcIvh+mj2n zS?R*Gt`2+;1LGo|bY)iLTQ&}44=nwpR5A_9-DVVaiYvDa0tQ+IL+inJ5R!dKyQa5k zc*i&ivjv)8Zrwx9O{*|xIKs5ZAyXXHm;LhRohf9xocxL$kK_W9>Yt@W(XNL1?i=-D zKSld)S|xk5Q7I8vzzk+2)YD-fJ`ab8#J+1TqxFj3iI`1pB zoBm#*?MQE)am(+~@yBQ74Ukv2k!7c_=?KX5U68orWn?8eL3}V@q~iwdFp3G*P5>M$I*vb8)#D7R{Vs%=apv_KNPR zRD@xzeQV9*{!B)=MlvihG2uBM%}?5^!y;Vc*3QQ+^ZH2#S{#oFXCiH7Va|0vA@Q(2 zJ@$uG0qL>#jD*`J_qnWDRrgeTzG9tISPE#`2e(U2xd<~iie?LuTez-rD|p$&&;;H+ z!=>gC6RUeA)c4#gRa$%n+ruKnw-ZUJ)ZR0fcxG=rVNz?$v@)0YMla(nTvRLiq4iFj z`Zv`h&YG|Ma@8E$coJ`4RdN?|50hvA6NUZ~+VL$gyzhTa0lkLuwQ$mFP6$5{jAy?W=$TCXgxe}c7; zo{v<3OC0soZPZ~%IWoN1bi8t8E^Y5qzo$8GTYNa#@C>Y~J$ntcip*;cJfigFvfH(k zhZ8=}F-3OF8YUL#>95&1IOD^CBM%yI*0Iw?A7S@wenw&7^Rg+eUZ)_2X2z^@Ptya& ze%rly8OemT{KXi#c=FtvdbzP&4I!Ued9y5!^E%OfH`5pz@M7-Rhm~Zz zDTU8hG0tplZlF0rxmMHuM9Fa9O(_N@kJ=%Hwan>JvEiNv6%C(PPI;BrAX!Zo#_n>) zYu#NDL%d&+{xG%rgRCgulx-UY?i4$s#5T`Ut#~aud2WXD%b7RWkvU6bmzNyO!2X@@ z{ObF#&l=nch4p0nRo*MBM0QO0mo}s5YG}~>r(&0`#Y`QFqn>1SeOg^9IomFb5)fwQ zwQx>)ouR_AGLlLo7dE|E&%ReN&pFi{cg-g;B;;YQ`HOs?cIlEvE;?%U)@^ z*GwIA>HM{ZLL3?5N%V&7SIP&eBZ~5^t{NP7hAuae)lk$I=kPi*s7 z23*YVgC1Q}q9zt|i5f~^Px_2D8Dc;E)f-Pw?x_yr_3$bF(amfAKuGDTbw}8UXUS<* zl(KXY?}y>%IuA~JurUR1!_STp;g*vN(wF7v$WUOLHz48|gOIPh$#BJS0MAcrq`x`U zMHPc$lUHyra-z!!+w=TVzD801ywJsHb-JcB3%dNgp8fV$0KP4 z2}Z$PW6ITh4q+&Jei-7;V34BCe3nd7#>_JHhzgscvD%P;4EJ7_R)Cp)Ozpn z;nSzzio|Rq(oW$Y8XQvJv|pjyx(9gohxRbGB9>j}Y_g3W`K&v;c1cdkc?!{HH?M#4IKpWB1{_N5Tq3 zlI71e@Xe>4@d=uEti$2j>SXH;mmpmwTe(=w@)x)!fZ`hGp6e)i~SPQp@4Hs>m5 z=FGfgh1-aLlP^yR-sQ{${kM|Y*etlS`7j`t-DWtcxkHyfDSn!-W z5juTa5wFn`mw(N@=J_(cEQvwo->Lzg^uNOp@|Iq<#ESvo)V-6Xk&gj5Y!6Q1J6IB+ zI{{vB6l5CUguId)IP(r2`InM{Aru|$>};U}`QQY-DIDNu1UR5?Z41ZX;0QM#S1VhB zm!rKK94iV(c-Rsg-EH7t@k0dK65QbkcQnn!LuDWiVs8BIr@TUFGqjy zkFBq*8ysQlXzu_nA-Fk$3ZkH4Ik>18;0!K?0XGrg;0P2#11<(AT5yCm92_M>fO`?p zbpe<-=++312d^yQcsv|o4M*63=HMtKsA&%}P}LD+pnx+R;Q~jvf?fb)fLU;s9}7o- zbN)aFpl=@-!WWK0qu_`DxcTo}0{?0PSPeqik>KSGX$xox0qIeG<1WHc6cSWBiWSAc zQDS&3Xasy(1osf2B8s+N)&xfnZ+8M51ucF+yAWB+(whLp2lp66kVpudg8%+~0#!9F zfAxe0d$I^^OIM(;|Hcf4Fz~VRCSD7HE;9g0H|{iSjIFVwjkg0BOvqX3L+1YbB@2m0-W$V?bFb zL2dj_H~?i)NJs(VSdc(NcyTaCz;7@|aiDWCDB;j>G#&>73Ct@|I1WVl;=oSDz#J75 z2MmB|Ld>D}U`C6A$=q(Hy}P$&%Q69?)LmGYZ8C?{GP)Dsr^frL~9u?>j^_kSOdf+vJct8zwJY%Aeo591X_ZA z|Jw@06cPy|zWdbxUK9`ioghQlumQ*pHYcL4fn7j0@kb)s17s7w6VX82DPCyBw2L!SZD?&~Ya;L-}_F#e7cyfdTV<*y_;a~{B6AU;o%&*>Dfvs%31-|!J zZ$$SAR#jr9KYjBr>D=MOJ$KL!6-gb12m!k`Z z(<&`XDglg3qt(9qsS%5ijcO^04;4RK`;9qJP3aH{+A&aN@CVr6;(Rk8#o62@hgZ;5QRO z?T&g&gBYq`XUsnJ;>p1qu_4wc71p-FxPKbO|G!%J)Aawp)dDnceoqJmcOL*`!KBu3 zw1Hw5;tgee2$^q~_r@*j|7H^aaeq_>w|l)E-Q5(SOVxsk5@;j}k3^#}0Ip-P_>)2~ zgaT9o;CewzyN{3i@|@b>nQKp_5mA>vN32US3rW#eNF?iT-9$HT@B zZe?lh3}E_3eW5>QxIe+x4hH#G7=U`e{s5B00e4{s|Ca`Z7SJEu?H3Jv%|QZHY^GsF zp@n`c4WP5FG;zpTZYd`YIhn0A(XD-e0w8?bTn>rE00gp?hDQEHL!%&H0{#6b2Nu7L zY^c9^#$h3MxVbI{1+AT%X=wa5GzigdDThUgZq*eC^1v3&=5k07>ita_chd}~_x0Qwl2yiP6f~s3-P&BuNhTUd7uxJntZ!L!d$Z;zTT&Ukl1Ks{j z!+|J!OI;iu!0atFQS@JWMuLOkf9U`OGh4?EfOZh6Y-vvn1>pzi?_YaCilM=l!B(0m z6k=~FCk_Fw%`|Z|1nRcXz-0f6CI&&YE#<_)Rmm+hJPPbfY^6c5!d4m%!dcMYe{$gQ z5PsT31Hl6RZyHwoZyN3|I|WgX*jAoVsK3V&g~I(cFHtBcirm5rN*w=Jdti5MtGr+x z`g4u&CRjSU*b<2Uj|(+7J9iL25D^VR-`yQrsfiHKz%dYP0{z^>%VgCmy6tWor literal 0 HcmV?d00001 diff --git a/examples/variational_continual_learning/vcl_Split MNIST_2019-02-23T00:23:14.pdf b/examples/variational_continual_learning/vcl_Split MNIST_2019-02-23T00:23:14.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8063e13ed341d87b1b027ec21d85c438e4baa271 GIT binary patch literal 15643 zcmd^mc|2Cn7eAqR$i8H&i-^LrKRot*-$@E(Un2WnR1y)AH4@pER$GJ?TiKJcwdj*< zSrRE)Rp;}C+^UqRQZ5oibn zx!W8?$jCrQ18YZnZwLb@Mi9~@(8C@=DqH(lySh6Hrko+}wS8dd3#uU*}K^Wu8w@AKIjOkYj5Xlt>hj6 z+J!sAVu|9CSP49Y!4SmJl9HeVx-)>eeO8rcWbf_n>tzeH39Ac?enp$Gg8xz_EZM5) zYR;}eTM$ys6{u6i-qzg?XkW+P&C$mRLc^0}1T%X3c-dPYLj&`^5Hh`D2$TY$p7ImJII^$IlhM*ga2h%^Hjjt9n>>$jkFuAMXMO@pLJp4r3Hm zl>ru=b$7%mM&G*NYt`f7D_?pqIpvLR74M-7WTGFtB;%%AyhKxw&=Y$jW%y{nQ4^DH zs4{g%)wij!eRT?50!p@m1-9Z7+3L>Gnd0t``m$f0{NWkw?|aYmz_lKE{{z>S{YnR~ zZDSzs&l9FRcJTF$9`^~;V`2`UbuNB!dS4Z~bBB>a3%!PV|4)@4g8ce4gI=Z_V-r*F zzrDNM^KIxy78K} z(tnUE6YmgTkbOo!BP1-%LpX0d;Y_jdW2Wt1;*PdfbIt>%5q4enh^59^jt1J#fj(Kx zGwIPC=bTj}WBdmg)6Aq=nLJLnpQ+?p=sH@!k%y+lZ#^zL&9pm?z@F#6KppzEZkty+ zmtxSq{f;BaY>zMSY~a z73LLRATPgE#T>{jynC16NUb8PPPT^Q|jvrx*x~glw>(N|TFILN2lxL`=H7Y5oBRLG}+g=f%P~g3z ztVJ@nLecHG6W&CWPfl)Tc=};jK`P%t8n(i_(=A7}&zN>kZOz$hsq)%pw+%UVYyRtj zVy#fa)*BubC!b~Y7-n62WGkL_t+b(c*46ch;A2}_7n*}O6^nER*@l736I88&(U)nH z%OojIevY*N&!Feg?(2#thoieB$+3rG6f<*YMF}^W-6$!(vHRhoi+hQ5V}c@1bcs6A zSr0jD0{3ol{mgWuWInO$bkD5jLwbkp$NQxObF*)8`w;@16`j9`*h$b7_6ptZwbwyw zEmGb7)}u5NMioHY)s8Z5E!+7nSJzIuh)j|D$2@z$b)QC}l(X2wh!7fsM+y(c4(k@5 zb1l2trc`EXcm1oNw{*jq`*iL77dvad5RUCDF*S&m()ZW)|NeR6ONWDZMWrAw*Vdj{ zYlG+89ip8dyl>6VKH?Fb@R|I>{u?oBY-}eQ9P|P{Sth@t+`$;~l0*F?SCf-QGWqRv zmb*J&1@U0aGD6$VSNAHj2-+Nkc#Z`ud zpLYzXq>9|KYa&9lZww!sySDSDi_oYuyZ_7x&M1XGQ~}=!F!)M4piP6 ze9a>oe%5xhr23-;*U1;i=&dI|r<(SjZ>vvJyS0^;e@kP7IkT`N?PI!R{RayfWMAr% zS)-I5-m$wowajbAI}^3lBj3#RqS531mHqr;9$d8|nboD_>Cwo_up^gdi;sxSA2#c9 z66z!3Z_!mb?7*89%{!o5fvXLc3;R81Tyr{Q?9e?*zLsGyzXsMG5H5`Zh_)HGT~rU9irFc@%@5EdX2}`i4!KL+(eQpy zYlr1vTf)8yK07^n$_}SF%=dJ}YviCvkX*_O?vQ_l#{3N%{+I8?U@>b2G?ezDLQ_u$ zBP=28sC%X(_LkO%H)^*N9IVM{XvSpw&o!p(=;j@XazA**fiq;!`P#*UBI&gphNn$b z&G@GOJJ@+Nyv|#|_&+ZpxwiH_&ukuVDUOuvj7@hyQKMrfort^+Y1?G^`nCzUP2t-1 z(>dQWFYa47^wU`NYjgGGkzSF@hlUUCe=ON z9U<6}+wnNc0;-P!=){SExA^+TC()Gp1JB+ECx-XQIaS~HM~*jlgz1d1w`w#cBt5!> zHhI9*vHSCqd{>#=$uyLS`UVR78z}xSh2e;6^o1?O?ME5hE+=CSPaZ{#swE>t!Yvf! z&Ly=`v9ytw(VfDXIn(Q?(jVX=%Tu8n-k@U`k8TDfNv@lym^wBxG$qwckrEA`#(bH( ze)qDk=G0j&Za1ZubHDHnIY)CQtSTMB{uHxgvTCv0`@H*;8a}hc z(Xg+XZ>N{Y($zVuH;@blx|@G5Kc10|54KXo-(0fyJYM#cLyH`O=bC|GkoC(J7aMr+Ij^E~C`vw(i)r z`LSHxPfCVy#i%YHhhR^qVR}<~j>Aq`x1z?MD9&t4o#05@KqUAquo;msn6)ZXjnj^$ z;A2!?NUvAF`qLvLwshjvNojGZr>?gJh5N8~DOirrHtQt1)!J1_x3guwa5Jgvn;C+B-rX%%KEXx8^W3d5Vm4luWnpUN}-< z9A9L7+;PTvsi5ldcZN7OQ_{Yt|!9f9d- zJ;Og=QcqpKdYYX%j$N|3QJc;%#;a*pVCbyW=?9Y|&OhV^&pr<=;9q)J;mkerI1O)j z8)qZZF85_gnD4RiyElyEcQsfX>@Z#s=eh3{awmH)>kxlz&2{$vCaf96DAxa)%kRX& zIK}P+uH>oV2w$<=)zrF|xLx=;i4_?9lgAlvF&R4q6njJ=&CDu&wrCCPPH9K6z__;o z#T>|xdog{O3q>jWsW5kFMy;e%X$pk-86RBTeK)ekX+k*G;ZfSlv*Gdl3$JC<)90%n zq^nRJrgkfcBb9OU!$#Ac1B!U_`nq1So_t|mC536`(2@D7nX!yXzXI`8?(?&y&?Q|SMhKp zTi@dHkHIHTU6|p*`Q(+4r<3K5)GD!7S3e)Cd|kUq}i;? z$D-D%MK)eKz5zw2eEdo%M|1RI!tf`%JxL2cr{yp$rl&L`$%K)&MsySA+N2cF5NMGEIql1yb?ykwAp5fGV z7xw;?7L6?PowsHmf~C_+-g8}+oC=vy>urs`>BJY(d|=O{pw%m5AysBOy*p-Z=X)|f zWC`bchkJ8E69ZANZf>T4$hs$r!;j2+ zqtg~Iw^+{B$gu6CsSMp?NO?H?_yCQ#`;L?)$@UW;4<8~XaOqav|8miCZ0@}iI-2i=t~Yv7wt*Xhce7d zx1_`z&|$aboL3nUZ<<#uE<2kMHkW;ZzVvG$AG2k4`sqDex8*2E-yXY@8F}FBRbSuC zJ26>YBE0&`1RP$If0#uueW1I_eDP<-@ueRW`6kAh8>nluNM^kg!s|dq zaYi4rdQz5Zf?ROVv@6}t{8M9YUPWE#@Mtsj^?}(t?40K~7uk(xZ^=$KgiXj+MNp3n zDP6MOosOTZx?J$C>4D`4wzsZk`+b{a4$3>IgwIJ$scn9BW?Xwb^-8&I`&3+QEhpcz z_A==RQ}A7xMhQ0au!#hh=0p>v^oAqHY>J!2AIw+>*X5(L>v;}P>37)~B+JeSUFGGN zf7285MiY1b@D|KPE9)z}M>`MdE%vymKQ=tikKU8ZIgDZIPQPh`$f9YIl1A)hqe_|Z zZhbAc>||-KyMeAY3%)S8wID*(pe9BcO*bh^#X)VgBzio2P-5{_p8agBO$M_$yv!^}IhIXqXz)QxI1rvmBi{ z`;#K}>Os;2(zPxeyIFvU!L5Zmx`s8!RDky0uv+{_Ht7%35|GB>%qF zZEr5v(|LC;{FlR+VkLyLy6&zi#L-iWwX2WJifvH5Y_h{&msRv8TL-pz+Ek)cstj@s z5M&v8M5aPHz!BGBl@nc(6xZanvk(zab;zpkJ-6Zc(0z%$G(OqAjn(b{T^sx2C!vg- z`>(J%Aee4a`*!_!-f__rHob1@LPl$i2xKgxpk#fR+HpBPP@E8)+MS5llQo%KA^t@-_nXAs%7rJ${^ z8M=A+%{Gv5vw(IzNK-Y^_G&^=zZ_$=0LMtjcb{tY4!mEgO!AO=Ei+F?O@ooO*T{cG zNVW?b=9$YoV0W%3E>kU5rIGQCNY=xfb9ZZ#8!ejMBDH3-c;!>_BrqK}yNWB-3kc`c z@cRVDAmy+i)A9BXhA-_JCjI!{OBu6Ac!(}5Q(7lZ5~!n^iYM`_mIn6ua;ZE&Zn z*r!(F6z8@bM5~7Q8}xln?qm&e4D;D)O<&lq7K%bYI|m%<2GK1VxA%;0)syvm>eo^FIC29T=f zn?;c8w)YK{l+0)<&Y>f^t$oYWWDf*?##XY-V7iJ8I^8T##NgLCm5bW(6cDw_LPn^- zsUO2KscHMYmZx!-bO?Sag-c;t0{P6MW2}LwrRKQ~yF2eIc&=TE*%HOi+CIE@-t2VG z6_KI#3YCh_B^a+)?|IJ*Z6CXtZcvPL(<|2K-8|_z`D1Wds^QAm3%Ucb{ccvxP*6UfY@|eZ#w%3aJPk!v0TwpMWaya*|9&ww+eQ3g3vjV4S z<_3rkoTio3KdNs%k>OLVHS^#GpKjTyf)a(T{(C5kDyUeVQX!9NSJTRw({w%x7$>;l znTIKTPnqVvb4oM{zT4!SU)!DRwsq@uImz~=_;&gAypgb%otD_Elpl(CrkALS zyrsf680lsq8ydHEG3&+=*(v#`RlN1)3S(@*Ljvtj@>PriW zWZ;y8S>{@=D)%t;?)E%XGiSIvp5U3QAEPJs=ELn^D|?S=O@DUgz^54pKS&SX7dvU2 zIJ2wil=23f18&n83%%~#WKhGT3?4z?E?Wn79AF@zSox0svvnKmKK zV=qhf)mIJJZ`+UgP?$#SEHQ_=a;^IAKwgg-cF@(OvYh9q)6(6#e8X$O=+%rYk9+pH zM4yj0zmCkeJrS$blZ{oWzV|iC#WPy?Lo z_3Pkr4DU<#@0>KY z@r|6C%hOG1#RReyj_AK!Hl*9)w%paMSY8=@#b%Vbx2b-&%m_|^drzO}t_#xo+zw|S zHO9QVBF<@*$xeXJ>^(x>Ti8;1uo=<}=j#m4Ii84!DQpeg$5zjf zkj3;lyyxoHq`POci$>yq@V?lBqPi39`<_ngbTL`oo4p0W56E}NSQ>9InVZE+>z^VM z%7Ifx;ZWP+)xs5$FGeaw@o{L7HMWn0(u|%uxK)8GTXNe?p@@Ck$sUbw7uulX&7uJ` zek~MNjk^mkG8OOriZsl(sV7PvKlNWO;`pjn?oK8aaZAv+c-J2O{8xQW^Qb`jOkF`! z(SD&j30z*>?220GO96W5YdMu!sS(d7s|t&&d>xXWIk&Yoo&4l<0Gl9NkDu7{QFnYX zQ>MIV-2Cb-+QeXm3dt*)2I$~hHm7sa{Z8_y@AyxSQ$1Bgz|+PJF}1)+=5YT&)uVS* zugN6>HyGt+aRzqXTMaQIV(e}-UE2~O#ENWkZZ>15&hzifKAQWfsES?NWQM&OR0Kmr z<6auN4E3owT&@3j>stHS8I70CQs-QnRV`YQ>m5}@htZ8PjiECy7ddx+z24qZw(Sve zV1d(gs)(jd>Ct&?dBfh=hP}-tR+Ie2#{-OW8;W-KzdOGy*W}F+%6?zK=^^s#*foKG z`?;zwWF20Xn=>Rkt0%E0pP6>$eVC|f&PyOtF33IJOIxv|fPfvA`^Y|Ug?N{DS47jA8RjVY{?Grpf@C)*q zyO(y5p`}PegY8*0LNBM8Rl7fVF)b>s`+3{PHz^C8Czr{wLnp{Ln3T<8Ec9B-!@@Ry zssTKkBjZZkGPOa&o5fLR>^lE{p=~=QAD!}y$y??*PPw9po{avVwg%882whDdpJq2m{3P^TvrQ7+RUKqx%J4oeqIQnWz!_!Oc=n(Uud;RLrPk(TOeOc* zJGy7CyC#|3IO8X?mElf-{Wa>4qGQ$*qO)}M`wv$=-Yy@feZ&9W{`)7Dj8Q57js?G2 zEVKR~g*PJJe62u7dLMVnuOoeoswm{coAAUV2<39qsI*x2B?lee+5aXgr#XD zdx+xfRrMRe3uY!}(9>xiO^@gpgNvv9PQTEgBbKq(6e7PJOuR(nGku=_L-s>v&GvK~ zOQ~-Raz3Jk4+!ttZpCVS)7a;t^TIzz)482L;bw}eyM%kNY?Z<%@z#@vJ0L~lUSa1G zUBy>yh0{3g&-(ME(cvERM6|QWL|~Z{=Yxeqj%W*23tyC{-=1~y$j+ST^sVmX-cK{{ zL{V_e4p`sfu{zt`lr|i!u$Lvs>f3VPV&Kmo6g+oMnQSmMn+0E(b>K_PkQh&)NZmHZ z7Nq3;Y^J2`;==NKL@W83aAinJ%+uhJK4bE}!MW!BdE7to3 zj?#U%(0Qf|I)mVl$f^t>BK+tyq*m4lT;QoBaBNqA<^#eFFt<7T}>J)~-|CI9+-rKE^W( z>De5ptWOjdJ~>%{hXj5fzYc}YYx5}=G*IxW52>k#ca8_#Zb8%6(qe=T$9&3+p4=Tv zGr@MOuDV?MLx(7Ok0^_Pl}qBQ%j&F)Z!gg*hEB~jaO_aYb-Cn@&-6_Q38{Hz`8>~e z;h6v;y*bu^j@yH(|CucPE9TCbG@%D3BAk~c6B$f6E|w0^oGr|=Nip8<6g^}rJ9GZ< z^jGq_FVf#P81rUPo5Z@ew$ZCEAG@dtW>zx^+N$8e*9koeBriiscHN=v)u93&zC}Nt z3RvD1QBAS!41Mcad_)7SCR-@*Zt$`G-6J0C%)uMTvstu;UiUUl_JSfk1sXg{3Op;% zDB`DNHdwYFfUzUmX|E4=QAeZMm6Y8JkJ4Wb-TwG|o_1mHN0F#VE&AqEEBd_LZeGWW zK{k>*UhORB?i?z4|Bb08-ZZ#tRIOUjDO7#R*!fkmCrcv*o@zp5!Xl@Z@or|(fqq>4 z>+;fg7s>oL_r{7Qm!It)JaXiXc=QHC+ANmBfEdX&FL({wwgX60vIMi<@EBSWKwRiqB&a4u4`5cxh6UWR+TgPKKkr%a$~zQ<5muYV7Y z`MCqFUNZSjJ^5&QGAsVW2Tx7iR3U12$L3|a*FIigP#`ld|5r7@lm2-@r)2GIPl`(g zL7zvhO?{0)tS1Npb+Yz?LsGpV^sf+2RW}fH2}hjD$RLm^&JGUta6Bjo;WUQ;T0(;O zP+NNli-(YIzQ=6ry}X?r-5?wRLVDPHIlJ3IVDm!;*?YM|NOw07y6NK&NI^ba!39FPLdau4IM6?k3B-)zAS8$<1w#d4w!RS3 z4?<(m5Hb+5T#ZKkJz!uQNHu3KZy#7GKyye~T}mqvuVQEvU=za;un<~;hy#s)r`jOi z6|SOU?``Yl?BV0?1)*Vl4dPs3vaYp{moq6^Rvd+bod@{)T>`8+*1vj!pYI^`td9W| z{h1mB(%9F=hZKAa2hDP?Jyy{KUNKyegPk_5XV z0ZTv-0Zx+u2W&*Z*G$v3eg1>cRRIcVnw51 z1xVsR0u2!*!5RVAU@783=Mr$jV;~F>j{pg*D*}WEM>k1erxIW(N=O0$z%n7_@O`jw z2_RBU0`S9a5+#WsU!lP{X)ss`1f0V{z=MJl0gWZ`;=#aI+5mMyzoar22NWSmN_Yt( zDJMz*c>$e(#()8?^os-i;=v-q5@9|Zga^4KP!^n%5(chEASnUP;r>aJ1}hNA02GRV z`@{ntl2TTMgL9Il!9C&N3lyv(SZsJ`a1Ih_KH%25$x3@y8ycp;jgr66RSC_6fo}yQVy5@N~F17xewR*onX6wXBEyZe{T+d zpClC_Qj8_=70M7Pnh}&VAux;Z#t1wL21cErqzi!wC6)9b42*&R%?JXM3rb+Gg-CHm zfMx>hcjc}rgdw>^*zJHVl*C~Rk)jMhJv(3#D>UE@NqzisZKSuIpaSgeATSgXpCjxs zNhN2nIjodiAh6m=G*<|ypHw;qRtH={00%~Tt_>Rc+t2>$(j6i_Ed@7WCyVXPL1fg^(}{MpCXz+bQyRyzN4A7);SAW}rHe3qgJXun*=pAT!0w$>2o zRDe{(tX2bVe8r<|SPi>c4J8gB$wn378**}gNefH=OPW6-{^e2s5aDsG%B>4FoK+(O zL;R%~T?_=E$69TWs;#oEI$Eqa9ukB9ttJ>%{I9QIu;{Vm8)@}s75W}DGANBGcF7Bq$)+-wh zdKTARtrNXtw5L?~@l`{y-cDc1!Y5rGwm#=eSo5FvjtAsieoq)okpFi3AfK8%5B;sM z3t% z1!xs4YHepb__RU#q|FEh*(>I~@>$-WH3*>2U(DcxFdt`kHx>9RJYf|n3<^y|VK7(} z3WG)C4v8R;%5V(;(S?s$`*^sz`?xyWKp1iGCki?F_;^Smk$-N8yL&kT7H}}K^R)$^ zqy5d}VdntZSlhY)q`JZ{^4AU$;AQWC0H8qv0Z`ko9{^=|u=_heziIF(0scX5zi4P2 z28Qx$X;?HK?8)nCM6esLqhVo}vz`X;%j;Po)?D(z<*tPI6MIO z>uHjNf9m1U8_;k7K(6D(OM+n3^)%wYyaX&bp|7h)z{74G{{7K2fdCNrdKwHc*U==< z|JXfB0t;gi`1hwh3H-n9Nl1c&&boS%DAYf^k}z~%Uk?Z4Eco}Q51_t(Xhak^$gHbJ zME|RMA{IQ$SYJ;99E#S`z*zu1z*tYi!T=8b{gwrd!u_KwGzvcat*eKFPoTePK3>+& zuJ&G}kC-*x8~}6!20;S4?(Xo;N-7&W2ib!S3f{E5eXPBFNLv;fhr?hIf`Y30YKZ>> DZd)AQ literal 0 HcmV?d00001 From 4fbcbfb977a4cdfdbdc0b4767ce08be3703bc659 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Mon, 25 Feb 2019 10:06:14 +0000 Subject: [PATCH 24/32] Fix to GPU error --- .../variational_continual_learning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 447522c..68b3a66 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -27,6 +27,7 @@ # Set the compute context, GPU is available otherwise CPU CTX = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() +mx.context.Context.default_ctx = CTX def set_seeds(seed=42): From d537377a6c2455399eaf3b00a83399174976dcca Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Mon, 25 Feb 2019 11:32:39 +0000 Subject: [PATCH 25/32] Added python-fire for simple command line argument parsing (see https://github.com/google/python-fire) Some tidy up A few fixes for permuted mnist due to regressions --- .../experiment.py | 16 +- .../variational_continual_learning/mnist.py | 5 +- .../variational_continual_learning/models.py | 5 +- .../variational_continual_learning.py | 171 +++++++++--------- 4 files changed, 99 insertions(+), 98 deletions(-) diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index 31e038e..6d0f8ec 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -39,7 +39,6 @@ def __init__(self, network_shape, num_epochs, learning_rate, optimizer, data_gen self.vanilla_model = None self.bayesian_model = None - # self.prediction_models = dict() self.task_ids = [] @property @@ -63,15 +62,8 @@ def reset(self): print("Creating Vanilla Model") self.vanilla_model = VanillaNN(**self.model_params) - # print("Creating Bayesian Model") - # self.bayesian_model = BayesianNN(**self.model_params) - - # if self.single_head: - # print("Creating Prediction Model") - # self.prediction_models[0] = BayesianNN(**self.model_params) - def new_task(self, task): - if self.single_head: + if self.single_head and self.bayesian_model is not None: return if len(self.task_ids) > 0: @@ -82,10 +74,6 @@ def new_task(self, task): # TODO: Would be nice if we could use the same object here self.bayesian_model = BayesianNN(**self.model_params) - # if len(self.coreset.iterator) > 0: - # # We'll keep the prediction model for each task since they'll get reused - # self.prediction_models[task.task_id] = BayesianNN(**self.model_params) - def run(self): self.reset() @@ -117,7 +105,6 @@ def run(self): batch_size=batch_size) priors = self.vanilla_model.net.collect_params() - # print("Number of variables in priors: {}".format(len(priors.items()))) train_iterator.reset() self.new_task(task) @@ -171,7 +158,6 @@ def fine_tune(self, task_id): coreset_iterator.reset() batch_size = coreset_iterator.provide_label[0].shape[0] - # prediction_model = self.prediction_models[task_id] prediction_model = BayesianNN(**self.model_params) priors = self.bayesian_model.posteriors diff --git a/examples/variational_continual_learning/mnist.py b/examples/variational_continual_learning/mnist.py index ba17930..5469795 100644 --- a/examples/variational_continual_learning/mnist.py +++ b/examples/variational_continual_learning/mnist.py @@ -90,5 +90,8 @@ def __iter__(self): batch_size = self.batch_size or x_test.shape[0] test_iter = NDArrayIter(x_test, y_test, batch_size) - yield Task(i, task, train_iter, test_iter, number_of_classes=y_train.shape[1]) + # number_of_classes = y_train.shape[1] + number_of_classes = len(np.unique(y_train)) + + yield Task(i, task, train_iter, test_iter, number_of_classes) return diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 738b166..8f91185 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -119,7 +119,10 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p cumulative_loss = 0 for i, batch in enumerate(train_iterator): with mx.autograd.record(): - output = self.forward(batch.data[0].as_in_context(self.ctx))[head] + if self.single_head: + output = self.forward(batch.data[0].as_in_context(self.ctx)) + else: + output = self.forward(batch.data[0].as_in_context(self.ctx))[head] labels = batch.label[0].as_in_context(self.ctx) loss = self.loss(output, labels) loss.backward() diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 68b3a66..0b752e0 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -17,6 +17,7 @@ import matplotlib.pyplot as plt from datetime import datetime +import fire from examples.variational_continual_learning.experiment import Experiment from examples.variational_continual_learning.mnist import SplitTaskGenerator, PermutedTaskGenerator @@ -57,87 +58,95 @@ def plot(title, experiments, num_tasks): plt.close() +class VCLRunner: + """ + Entry point for variational continual learning examples + """ + @staticmethod + def run(task='split', learning_rate=0.01, optimizer='adam', num_epochs=120, num_tasks=None): + # Load data + data = mx.test_utils.get_mnist() + input_dim = int(np.prod(data['train_data'][0].shape)) # Note the data will get flattened later + verbose = False + + # noinspection PyUnreachableCode + if task.lower() == 'split': + title = "Split MNIST" + tasks = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)) + tasks = tasks[:num_tasks] + # num_epochs = 120 + # num_epochs = 1 # 120 + batch_size = None + gen = SplitTaskGenerator + label_shape = 2 + network_shape = (input_dim, 256, 256, (label_shape,)) + single_head = False + coreset_size = 40 + elif task.lower() == 'permuted': + title = "Permuted MNIST" + tasks = range(10) + tasks = tasks[:num_tasks] + # num_epochs = 100 + # num_epochs = 1 + batch_size = 256 + gen = PermutedTaskGenerator + label_shape = 10 + network_shape = (input_dim, 100, 100, label_shape) + single_head = True + coreset_size = 200 + else: + raise ValueError("Unknown task type {}, possibilities are 'split' or 'permuted'".format(task)) + + experiment_parameters = ( + dict( + coreset=Vanilla(), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=single_head), + dict( + coreset=Random(coreset_size=coreset_size), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=single_head), + dict( + coreset=KCenter(coreset_size=coreset_size), + learning_rate=learning_rate, + optimizer=optimizer, + network_shape=network_shape, + num_epochs=num_epochs, + single_head=single_head) + ) + + experiments = [] + + print("Task {}\nLearning rate {}\nOptimizer {}\nnumber of epochs {}\nnumber of tasks {}".format( + title, learning_rate, optimizer, num_epochs, num_tasks + )) + + # Run experiments + for params in experiment_parameters: + print("-" * 50) + print("Running experiment", params['coreset'].__class__.__name__) + print("-" * 50) + set_seeds() + experiment = Experiment(batch_size=batch_size, + data_generator=gen(data, batch_size=batch_size, tasks=tasks), + ctx=CTX, verbose=verbose, + **params) + experiment.run() + print(experiment.overall_accuracy) + experiments.append(experiment) + print("-" * 50) + print() + + plot(title, experiments, len(tasks)) + + if __name__ == "__main__": import warnings warnings.filterwarnings("ignore", category=UserWarning) - - # Load data - data = mx.test_utils.get_mnist() - input_dim = int(np.prod(data['train_data'][0].shape)) # Note the data will get flattened later - verbose = False - - # noinspection PyUnreachableCode - if True: - title = "Split MNIST" - tasks = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)) - num_epochs = 120 - # tasks = ((0, 1), (2, 3)) - # num_epochs = 1 # 120 - batch_size = None - gen = SplitTaskGenerator - label_shape = 2 - network_shape = (input_dim, 256, 256, (label_shape, )) - single_head = False - coreset_size = 40 - else: - title = "Permuted MNIST" - tasks = range(10) - num_epochs = 100 - # tasks = range(2) - # num_epochs = 1 - batch_size = 256 - gen = PermutedTaskGenerator - label_shape = 10 - network_shape = (input_dim, 100, 100, label_shape) - single_head = True - coreset_size = 200 - - data_dtype = data['train_data'].dtype - label_dtype = data['train_label'].dtype - - learning_rate = 0.01 - optimizer = 'adam' - - experiment_parameters = ( - dict( - coreset=Vanilla(), - learning_rate=learning_rate, - optimizer=optimizer, - network_shape=network_shape, - num_epochs=num_epochs, - single_head=single_head), - dict( - coreset=Random(coreset_size=coreset_size), - learning_rate=learning_rate, - optimizer=optimizer, - network_shape=network_shape, - num_epochs=num_epochs, - single_head=single_head), - dict( - coreset=KCenter(coreset_size=coreset_size), - learning_rate=learning_rate, - optimizer=optimizer, - network_shape=network_shape, - num_epochs=num_epochs, - single_head=single_head) - ) - - experiments = [] - - # Run experiments - for params in experiment_parameters: - print("-" * 50) - print("Running experiment", params['coreset'].__class__.__name__) - print("-" * 50) - set_seeds() - experiment = Experiment(batch_size=batch_size, - data_generator=gen(data, batch_size=batch_size, tasks=tasks), - ctx=CTX, verbose=verbose, - **params) - experiment.run() - print(experiment.overall_accuracy) - experiments.append(experiment) - print("-" * 50) - print() - - plot(title, experiments, len(tasks)) + fire.Fire(VCLRunner.run) From 526aae3a52949320765b83547c824905a0b8ffd4 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Tue, 26 Feb 2019 11:09:43 +0000 Subject: [PATCH 26/32] Fix regressions after merge --- examples/variational_continual_learning/models.py | 1 + mxfusion/inference/grad_based_inference.py | 2 -- mxfusion/inference/inference_alg.py | 5 ++++- mxfusion/models/factor_graph.py | 2 ++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 8f91185..31c6b63 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -314,6 +314,7 @@ def prediction_prob(self, test_iter, head): new_posterior[r].factor.successors = [(k, new_posterior[v]) for k, v in r.factor.successors] new_posterior[y].set_prior(new_posterior[y.factor]) new_posterior[y].factor.predecessors = [(k, new_posterior[v]) for k, v in y.factor.predecessors] + new_posterior[y].factor.successors = [(k, new_posterior[v]) for k, v in y.factor.successors] # Set the posterior to be the new posterior self.inference.inference_algorithm._extra_graphs[0] = new_posterior diff --git a/mxfusion/inference/grad_based_inference.py b/mxfusion/inference/grad_based_inference.py index c4a1f2d..715186e 100644 --- a/mxfusion/inference/grad_based_inference.py +++ b/mxfusion/inference/grad_based_inference.py @@ -82,8 +82,6 @@ def run(self, optimizer='adam', learning_rate=1e-3, max_iter=2000, the corresponding variable in model definition and the value of the argument is the data in numpy array format. """ data = [kwargs[v] for v in self.observed_variable_names] - ignored = [kwargs['ignored'][v] for v in self.observed_variable_names - if 'ignored' in kwargs and v in kwargs['ignored']] self.initialize(**kwargs) infr = self.create_executor() diff --git a/mxfusion/inference/inference_alg.py b/mxfusion/inference/inference_alg.py index 9cce30b..fd0a150 100644 --- a/mxfusion/inference/inference_alg.py +++ b/mxfusion/inference/inference_alg.py @@ -108,7 +108,7 @@ def replicate_self(self, model, extra_graphs=None): replicant._observed_names = [v.name for v in observed] return replicant - def __init__(self, model, observed, extra_graphs=None): + def __init__(self, model, observed, extra_graphs=None, ignored=None): """ Initialize the algorithm @@ -118,6 +118,9 @@ def __init__(self, model, observed, extra_graphs=None): :type observed: [Variable] :param extra_graphs: a list of extra FactorGraph used in the inference algorithm. :type extra_graphs: [FactorGraph] + :param ignored: A list of ignored variables. + These are variables that are not observed, but also will not be inferred + :type ignored: [Variable] """ self._model_graph = model self._extra_graphs = extra_graphs if extra_graphs is not None else [] diff --git a/mxfusion/models/factor_graph.py b/mxfusion/models/factor_graph.py index ebf34c5..6386520 100644 --- a/mxfusion/models/factor_graph.py +++ b/mxfusion/models/factor_graph.py @@ -278,6 +278,8 @@ def draw_samples(self, F, variables, num_samples=1, targets=None, ignored=None): elif any(known): raise InferenceError("Part of the outputs of the distribution " + f.__class__.__name__ + " has been observed!") + if any(v in ignored for (_, v) in f.outputs): + continue outcome_uuid = [v.uuid for _, v in f.outputs] outcome = f.draw_samples( F=F, num_samples=num_samples, variables=variables, always_return_tuple=True) From 56565df1f4c8f2a7db0b57f2f6456f77286bf068 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Tue, 26 Feb 2019 11:11:35 +0000 Subject: [PATCH 27/32] Fix bug in plotting --- .../variational_continual_learning.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/examples/variational_continual_learning/variational_continual_learning.py b/examples/variational_continual_learning/variational_continual_learning.py index 0b752e0..d1040b8 100644 --- a/examples/variational_continual_learning/variational_continual_learning.py +++ b/examples/variational_continual_learning/variational_continual_learning.py @@ -36,28 +36,6 @@ def set_seeds(seed=42): np.random.seed(seed) -def plot(title, experiments, num_tasks): - fig = plt.figure(figsize=(num_tasks, 3)) - ax = plt.gca() - - x = range(1, len(tasks) + 1) - - for experiment in experiments: - acc = np.nanmean(experiment.overall_accuracy, axis=1) - label = experiment.coreset.__class__.__name__ - plt.plot(x, acc, label=label, marker='o') - ax.set_xticks(x) - ax.set_ylabel('Average accuracy') - ax.set_xlabel('# tasks') - ax.legend() - ax.set_title(title) - - filename = "vcl_{}_{}.pdf".format(title, datetime.now().isoformat()[:-7]) - fig.savefig(filename, bbox_inches='tight') - plt.show() - plt.close() - - class VCLRunner: """ Entry point for variational continual learning examples @@ -143,7 +121,29 @@ def run(task='split', learning_rate=0.01, optimizer='adam', num_epochs=120, num_ print("-" * 50) print() - plot(title, experiments, len(tasks)) + VCLRunner.plot(title, experiments, len(tasks)) + + @staticmethod + def plot(title, experiments, num_tasks): + fig = plt.figure(figsize=(num_tasks, 3)) + ax = plt.gca() + + x = range(1, num_tasks + 1) + + for experiment in experiments: + acc = np.nanmean(experiment.overall_accuracy, axis=1) + label = experiment.coreset.__class__.__name__ + plt.plot(x, acc, label=label, marker='o') + ax.set_xticks(x) + ax.set_ylabel('Average accuracy') + ax.set_xlabel('# tasks') + ax.legend() + ax.set_title(title) + + filename = "vcl_{}_{}.pdf".format(title, datetime.now().isoformat()[:-7]) + fig.savefig(filename, bbox_inches='tight') + plt.show() + plt.close() if __name__ == "__main__": From 422a7dc3eeb1c773f43884a0581747f5290b2b5b Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Tue, 26 Feb 2019 14:14:29 +0000 Subject: [PATCH 28/32] Fix off-by-one error in printing --- mxfusion/inference/batch_loop.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mxfusion/inference/batch_loop.py b/mxfusion/inference/batch_loop.py index fd69bb4..67d215c 100644 --- a/mxfusion/inference/batch_loop.py +++ b/mxfusion/inference/batch_loop.py @@ -58,13 +58,15 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', loss, loss_for_gradient = infr_executor(mx.nd.zeros(1, ctx=ctx), *data) loss_for_gradient.backward() if verbose: - print('\rIteration {} loss: {}'.format(i + 1, loss.asscalar()), + print('\rIteration {} loss: {}'.format(i, loss.asscalar()), end='') if i % iter_step == 0 and i > 0: print() if callback is not None: - callback(i + 1, loss.asscalar()) + callback(i, loss.asscalar()) trainer.step(batch_size=1, ignore_stale_grad=True) loss, _ = infr_executor(mx.nd.zeros(1, ctx=ctx), *data) + if verbose: + print('\rIteration {} loss: {}'.format(max_iter, loss.asscalar()), end='') if callback is not None: - callback(max_iter + 1, loss.asscalar()) + callback(max_iter, loss.asscalar()) From 17f68415a947018ff5f81c07679211d49f768c13 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Tue, 26 Feb 2019 17:37:33 +0000 Subject: [PATCH 29/32] Fix in the case of ignored==None --- mxfusion/inference/forward_sampling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mxfusion/inference/forward_sampling.py b/mxfusion/inference/forward_sampling.py index 39dd443..638b143 100644 --- a/mxfusion/inference/forward_sampling.py +++ b/mxfusion/inference/forward_sampling.py @@ -117,6 +117,7 @@ def merge_posterior_into_model(model, posterior, observed, ignored=None): These are variables that are not observed, but also will not be inferred :type ignored: [Variable] """ + ignored = ignored or set() new_model = model.clone() for lv in model.get_latent_variables(observed): # Test if lv is in ignored From d73ebad40dce279183e184c1e7c08026bce3041d Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Thu, 28 Mar 2019 18:31:17 +0000 Subject: [PATCH 30/32] Fixing coreset merging --- .../coresets.py | 9 ++ .../experiment.py | 14 +- .../variational_continual_learning/models.py | 128 +++++++++++++----- 3 files changed, 111 insertions(+), 40 deletions(-) diff --git a/examples/variational_continual_learning/coresets.py b/examples/variational_continual_learning/coresets.py index a514970..a61463e 100644 --- a/examples/variational_continual_learning/coresets.py +++ b/examples/variational_continual_learning/coresets.py @@ -92,6 +92,15 @@ def update(self, iterator): def reset(self): self.iterator = MultiIter([]) + @staticmethod + def merge(coreset): + # For sizes 0 and 1 just return the original coreset + if len(coreset.iterator) <= 1: + return coreset + merged = coreset.__class__(coreset_size=coreset.coreset_size) + merged.append() + raise NotImplementedError + class Vanilla(Coreset): """ diff --git a/examples/variational_continual_learning/experiment.py b/examples/variational_continual_learning/experiment.py index 6d0f8ec..c57727c 100644 --- a/examples/variational_continual_learning/experiment.py +++ b/examples/variational_continual_learning/experiment.py @@ -137,12 +137,13 @@ def get_coreset(self, task_id): """ if self.single_head: # TODO: Cache the results if this is expensive? - return Coreset.merge(self.coreset) + iterator = Coreset.merge(self.coreset).iterator else: - if len(self.coreset.iterator) > 0: - return self.coreset.iterator[task_id] - else: - return None + iterator = self.coreset.iterator + + if len(iterator) > 0: + return iterator[task_id] + return None def fine_tune(self, task_id): """ @@ -189,7 +190,8 @@ def get_scores(self): predicted_means = np.mean(predictions, axis=0) predicted_labels = np.argmax(predicted_means, axis=1) test_labels = test_iterator.label[0][1].asnumpy() - score = len(np.where(np.abs(predicted_labels - test_labels) < 1e-10)[0]) * 1.0 / test_labels.shape[0] + mt = test_labels.shape[0] + score = len(np.where(np.abs(predicted_labels[:mt] - test_labels) < 1e-10)[0]) * 1.0 / mt scores.append(score) return scores diff --git a/examples/variational_continual_learning/models.py b/examples/variational_continual_learning/models.py index 31c6b63..7b0e67b 100644 --- a/examples/variational_continual_learning/models.py +++ b/examples/variational_continual_learning/models.py @@ -21,8 +21,11 @@ from mxfusion.components import MXFusionGluonFunction from mxfusion.components.distributions import Normal, Categorical from mxfusion.inference import BatchInferenceLoop, create_Gaussian_meanfield, GradBasedInference, \ - StochasticVariationalInference, VariationalPosteriorForwardSampling -from mxfusion.components.variables import add_sample_dimension + StochasticVariationalInference, VariationalPosteriorForwardSampling, MinibatchInferenceLoop, \ + GradIteratorBasedInference + +import numpy as np + from abc import ABC, abstractmethod from .mlp import MLP @@ -190,35 +193,26 @@ def get_net_parameters(self, head): # noinspection PyUnresolvedReferences def train(self, train_iterator, validation_iterator, head, batch_size, epochs, priors=None): - for i, batch in enumerate(train_iterator): - if i > 0: - raise NotImplementedError("Currently not supported for more than one batch of data. " - "Please switch to using the MinibatchInferenceLoop") + if self.single_head: + print("Running single-headed inference") - data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) - labels = mx.nd.expand_dims(batch.label[0], axis=-1).as_in_context(self.ctx) + dummy = mx.nd.flatten(mx.nd.zeros(shape=train_iterator.provide_data[0].shape)).as_in_context(self.ctx) + x_shape = dummy.shape + y_shape = train_iterator.provide_label[0].shape if self.verbose: - print("Data shape {}".format(data.shape)) + print("Data shape {}".format(x_shape)) # pass some data to initialise the net - self.net(data[:1]) + self.net(dummy[:1]) - if self.single_head: - observed = [self.model.x, self.model.y] - ignored = None - kwargs = dict(y=labels, x=data) - else: - observed = [self.model.x, getattr(self.model, "y{}".format(head))] - y_other = [getattr(self.model, "y{}".format(h)) for h in range(self.num_heads) if h != head] - r_other = [getattr(self.model, "r{}".format(h)) for h in range(self.num_heads) if h != head] - ignored = y_other + r_other - kwargs = {'x': data, 'y{}'.format(head): labels, 'ignored': dict((v.name, v) for v in ignored)} - - q = create_Gaussian_meanfield(model=self.model, ignored=ignored, observed=observed) - alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed, - ignored=ignored) - self.inference = GradBasedInference(inference_algorithm=alg, grad_loop=BatchInferenceLoop()) + observed = [self.model.x, self.model.y] + kwargs = {'x': x_shape, 'y': y_shape} + + q = create_Gaussian_meanfield(model=self.model, observed=observed) + alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed) + + self.inference = GradIteratorBasedInference(inference_algorithm=alg) self.inference.initialize(**kwargs) for v in self.get_net_parameters(head).values(): @@ -250,12 +244,75 @@ def train(self, train_iterator, validation_iterator, head, batch_size, epochs, p self.inference.params.param_dict[mean_prior]._grad_req = 'null' self.inference.params.param_dict[variance_prior]._grad_req = 'null' - if self.single_head: - print("Running single-headed inference") - else: - print("Running multi-headed inference for head {}".format(head)) self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, - verbose=False, callback=self.print_status, **kwargs) + verbose=False, callback=self.print_status, data=train_iterator) + + else: + print("Running multi-headed inference for head {}".format(head)) + + for i, batch in enumerate(train_iterator): + if i > 0: + raise NotImplementedError("Currently not supported for more than one batch of data. " + "Please switch to using the MinibatchInferenceLoop") + + data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) + labels = mx.nd.expand_dims(batch.label[0], axis=-1).as_in_context(self.ctx) + + if self.verbose: + print("Data shape {}".format(data.shape)) + + # pass some data to initialise the net + self.net(data[:1]) + + if self.single_head: + observed = [self.model.x, self.model.y] + ignored = None + kwargs = dict(y=labels, x=data) + else: + observed = [self.model.x, getattr(self.model, "y{}".format(head))] + y_other = [getattr(self.model, "y{}".format(h)) for h in range(self.num_heads) if h != head] + r_other = [getattr(self.model, "r{}".format(h)) for h in range(self.num_heads) if h != head] + ignored = y_other + r_other + kwargs = {'x': data, 'y{}'.format(head): labels, 'ignored': dict((v.name, v) for v in ignored)} + + q = create_Gaussian_meanfield(model=self.model, ignored=ignored, observed=observed) + alg = StochasticVariationalInference(num_samples=5, model=self.model, posterior=q, observed=observed, + ignored=ignored) + + self.inference = GradBasedInference(inference_algorithm=alg) + self.inference.initialize(**kwargs) + + for v in self.get_net_parameters(head).values(): + v_name_mean = v.inherited_name + "_mean" + v_name_variance = v.inherited_name + "_variance" + + if priors is None or (v_name_mean not in priors and v_name_variance not in priors): + means = self.prior_mean(shape=v.shape) + variances = self.prior_variance(shape=v.shape) + elif isinstance(priors, ParameterDict): + # This is a maximum likelihood estimate + short_name = v.inherited_name.partition(self.prefix)[-1] + means = priors.get(short_name).data() + variances = self.prior_variance(shape=v.shape) + else: + # Use posteriors from previous round of inference + means = priors[v_name_mean] + variances = priors[v_name_variance] + + mean_prior = getattr(self.model, v_name_mean) + variance_prior = getattr(self.model, v_name_variance) + + # v.set_prior(Normal(mean=mean_prior, variance=variance_prior)) + + self.inference.params[mean_prior] = means + self.inference.params[variance_prior] = variances + + # Indicate that we don't want to perform inference over the priors + self.inference.params.param_dict[mean_prior]._grad_req = 'null' + self.inference.params.param_dict[variance_prior]._grad_req = 'null' + + self.inference.run(max_iter=self.max_iter, learning_rate=self.learning_rate, + verbose=False, callback=self.print_status, **kwargs) # noinspection PyUnresolvedReferences @property @@ -283,10 +340,12 @@ def prediction_prob(self, test_iter, head): if self.inference is None: raise RuntimeError("Model not yet learnt") + predictions = [] + for i, batch in enumerate(test_iter): - if i > 0: - raise NotImplementedError("Currently not supported for more than one batch of data. " - "Please switch to using the MinibatchInferenceLoop") + # if i > 0: + # raise NotImplementedError("Currently not supported for more than one batch of data. " + # "Please switch to using the MinibatchInferenceLoop") data = mx.nd.flatten(batch.data[0]).as_in_context(self.ctx) @@ -334,7 +393,8 @@ def prediction_prob(self, test_iter, head): # Set the posterior back to the old posterior self.inference.inference_algorithm._extra_graphs[0] = old_posterior - return res[0].asnumpy() + predictions.append(res[0].asnumpy()) + return np.concatenate(predictions, axis=1) @staticmethod def prior_mean(shape): From b72e396ec21036203993f72ce4a6cccf39258940 Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Thu, 28 Mar 2019 18:32:08 +0000 Subject: [PATCH 31/32] Adaptation of minibatch loop to work with mx.io.DataIter and mx.io.DataBatch --- mxfusion/inference/minibatch_loop.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mxfusion/inference/minibatch_loop.py b/mxfusion/inference/minibatch_loop.py index a085136..911b407 100644 --- a/mxfusion/inference/minibatch_loop.py +++ b/mxfusion/inference/minibatch_loop.py @@ -64,6 +64,8 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', if isinstance(data, mx.gluon.data.DataLoader): data_loader = data + elif isinstance(data, mx.io.DataIter): + data_loader = data else: data_loader = mx.gluon.data.DataLoader( ArrayDataset(*data), batch_size=self.batch_size, shuffle=True, @@ -77,18 +79,21 @@ def run(self, infr_executor, data, param_dict, ctx, optimizer='adam', n_batches = 0 for i, data_batch in enumerate(data_loader): with mx.autograd.record(): + if isinstance(data_batch, mx.io.DataBatch): + data_batch = (data_batch.data[0], data_batch.label[0]) loss, loss_for_gradient = infr_executor(mx.nd.zeros(1, ctx=ctx), *data_batch) loss_for_gradient.backward() if verbose: print('\repoch {} Iteration {} loss: {}\t\t\t'.format( e + 1, i + 1, loss.asscalar()), end='') + if callback is not None: + callback(i, loss.asscalar()) trainer.step(batch_size=self.batch_size, ignore_stale_grad=True) L_e += loss.asscalar() n_batches += 1 if verbose: - if callback is None: - print('epoch-loss: {} '.format(L_e / n_batches)) - else: - callback(e, L_e) + print('epoch-loss: {} '.format(L_e / n_batches)) + if callback is not None: + callback(e, L_e) From 565c61c5c4a5ea55f2a920b7a45b731b1302752f Mon Sep 17 00:00:00 2001 From: "Diethe, Tom" Date: Thu, 28 Mar 2019 18:32:30 +0000 Subject: [PATCH 32/32] Some tidying --- mxfusion/util/inference.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mxfusion/util/inference.py b/mxfusion/util/inference.py index 02f7601..79437a2 100644 --- a/mxfusion/util/inference.py +++ b/mxfusion/util/inference.py @@ -19,7 +19,10 @@ def broadcast_samples_dict(F, array_dict, num_samples=None): """ - Broadcast the shape of arrays in the provided dictionary. When the num_samples argument is given, all the sample dimesnions (the first dimension) of the arrays in the dictionary will be broadcasted to the size of num_samples. If the num_samples argument is not given, the sample dimensions of the arrays in the dictionary will be broadcasted to the maximum number of the sizes of the sample dimensions. + Broadcast the shape of arrays in the provided dictionary. When the num_samples argument is given, all the sample + dimensions (the first dimension) of the arrays in the dictionary will be broadcasted to the size of num_samples. + If the num_samples argument is not given, the sample dimensions of the arrays in the dictionary will be broadcasted + to the maximum number of the sizes of the sample dimensions. :param F: the execution mode of MXNet. :type F: mxnet.ndarray or mxnet.symbol @@ -65,8 +68,7 @@ def discover_shape_constants(data_shapes, graphs): variables in the model and inference models. :param data_shapes: a dict of shapes of data - :param graphs: a list of factor graphs of which variable shapes are - searched. + :param graphs: a list of factor graphs of which variable shapes are searched. :returns: a dict of constants discovered from data shapes :rtype: {Variable: int} """ @@ -79,11 +81,15 @@ def discover_shape_constants(data_shapes, graphs): for s1, s2 in zip(def_shape, shape): if isinstance(s1, int): if s1 != s2: - raise ModelSpecificationError("Variable ({}) shape mismatch between expected and found! s1 : {} s2 : {}".format(str(variables[var_id]),str(s1), str(s2))) + raise ModelSpecificationError( + "Variable ({}) shape mismatch between expected and found! s1 : {} s2 : {}" + .format(str(variables[var_id]),str(s1), str(s2))) elif isinstance(s1, Variable): shape_constants[s1] = s2 else: - raise ModelSpecificationError("The shape of a Variable should either an integer or a Variable, but encountered {}!".format(str(type(s1)))) + raise ModelSpecificationError( + "The shape of a Variable should either an integer or a Variable, but encountered {}!" + .format(str(type(s1)))) return shape_constants