diff --git a/Python/README.md b/Python/README.md index 3a6fc84..61c7a98 100644 --- a/Python/README.md +++ b/Python/README.md @@ -99,6 +99,10 @@ Default: --dim 32 32 3 ``` Default: --seed 0 ``` +12. **--profile**: enable profiling +``` +Default: false +``` ### Newton Method @@ -191,4 +195,4 @@ Memory | bsize 1024 | bsize 512| bsize 256 10% sub-sampled Gv|7.2 GB |3.8 GB|2.1 GB 5% sub-sampled Gv |7.2 GB |3.8 GB|2.1 GB 1% sub-sampled Gv |7.2 GB |3.8 GB|2.1 GB -SGD |7.2 GB|3.8 GB|2.1 GB| \ No newline at end of file +SGD |7.2 GB|3.8 GB|2.1 GB| diff --git a/Python/train.py b/Python/train.py index e538b88..b1fcc65 100644 --- a/Python/train.py +++ b/Python/train.py @@ -1,3 +1,5 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' import pdb import numpy as np import tensorflow as tf @@ -8,7 +10,10 @@ from net.net import CNN from newton_cg import newton_cg -from utilities import read_data, predict, ConfigClass, normalize_and_reshape +from utilities import ( + read_data, predict, ConfigClass, normalize_and_reshape, + Profiler) + def parse_args(): parser = argparse.ArgumentParser(description='Newton method on DNN') @@ -93,6 +98,8 @@ def parse_args(): default=[32, 32, 3], type=int) parser.add_argument('--seed', dest='seed', help='a nonnegative integer for \ reproducibility', type=int) + parser.add_argument('--profile', action='store_true', + help='enable profiling') args = parser.parse_args() return args @@ -149,7 +156,7 @@ def gradient_trainer(config, sess, network, full_batch, val_batch, saver, test_n log_file = open(config.log_file, 'w') print(config.args, file=log_file) sess.run(tf.compat.v1.global_variables_initializer()) - + print('-------------- initializing network by methods in He et al. (2015) --------------') param = tf.compat.v1.trainable_variables() @@ -159,6 +166,8 @@ def gradient_trainer(config, sess, network, full_batch, val_batch, saver, test_n best_acc = 0.0 lr = config.lr + profiler = Profiler(config.args.profile) + for epoch in range(0, args.epoch): loss_avg = 0.0 @@ -177,10 +186,13 @@ def gradient_trainer(config, sess, network, full_batch, val_batch, saver, test_n batch_labels = np.ascontiguousarray(batch_labels) config.elapsed_time += time.time() - load_time - step, _, batch_loss= sess.run( - [global_step, optimizer, loss_with_reg], - feed_dict = {x: batch_input, y: batch_labels, learning_rate: lr} - ) + with profiler: + step, _, batch_loss= sess.run( + [global_step, optimizer, loss_with_reg], + feed_dict = {x: batch_input, y: batch_labels, learning_rate: lr}, + options=profiler.run_options, + run_metadata=profiler.run_metadata + ) # print initial loss if epoch == 0 and i == 0: @@ -251,8 +263,13 @@ def gradient_trainer(config, sess, network, full_batch, val_batch, saver, test_n .format(val_acc*100, best_acc*100, total_running_time) print(output_str) + summary = profiler.summary() + if config.args.profile: + print(summary) if not config.screen_log_only: print(output_str, file=log_file) + if config.args.profile: + print(summary, file=log_file) log_file.close() def newton_trainer(config, sess, network, full_batch, val_batch, saver, test_network): diff --git a/Python/utilities.py b/Python/utilities.py index 4d1a58c..dfd5e98 100644 --- a/Python/utilities.py +++ b/Python/utilities.py @@ -1,9 +1,11 @@ -import numpy as np import math -import scipy.io as sio import os -import math -import pdb + +import numpy as np +import scipy.io as sio +import tensorflow as tf +from tensorflow.python import _pywrap_stat_summarizer + class ConfigClass(object): def __init__(self, args, num_data, num_cls): @@ -177,3 +179,34 @@ def predict(sess, network, test_batch, bsize): return avg_loss, avg_acc, results + +class Profiler: + def __init__(self, is_enabled=False): + self._is_enabled = is_enabled + self.run_metadata = None + self._summarizer = _pywrap_stat_summarizer.StatSummarizer() + + if self._is_enabled: + self.run_options = tf.compat.v1.RunOptions( + trace_level=tf.compat.v1.RunOptions.FULL_TRACE) + else: + self.run_options = None + + def add_stat(self, run_metadata): + self._summarizer.ProcessStepStatsStr( + run_metadata.step_stats.SerializeToString()) + + def __enter__(self): + if self._is_enabled: + if self.run_metadata is not None: + raise RuntimeError('Recursively called') + self.run_metadata = tf.compat.v1.RunMetadata() + return self + + def __exit__(self, *args, **kwargs): + if self._is_enabled: + self.add_stat(self.run_metadata) + self.run_metadata = None + + def summary(self): + return self._summarizer.GetOutputString()