Skip to content

Commit df9cf75

Browse files
Graeme Nailgraemenail
authored andcommitted
Debugging
1 parent 7e67124 commit df9cf75

File tree

8 files changed

+43
-24
lines changed

8 files changed

+43
-24
lines changed

src/graph/expression_graph.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ class Tensors {
3434

3535
public:
3636
Tensors(Ptr<Backend> backend)
37-
: tensors_(New<TensorAllocator>(backend)),
38-
cache_(New<TensorAllocator>(backend)),
37+
: tensors_(New<TensorAllocator>(backend, "tensors")),
38+
cache_(New<TensorAllocator>(backend, "cache")),
3939
shortterm_(New<WeakMemory>()),
4040
longterm_(New<Memory>())/*,
4141
midterm_(New<ShortlistMemory>())*/ {}
4242

4343
Tensors(Ptr<Backend> backend, Ptr<Device> device)
4444
: tensors_(New<TensorAllocator>(backend, device)),
45-
cache_(New<TensorAllocator>(backend)),
45+
cache_(New<TensorAllocator>(backend, "cache")),
4646
shortterm_(New<WeakMemory>()),
4747
longterm_(New<Memory>())/*,
4848
midterm_(New<ShortlistMemory>())*/ {}
@@ -74,6 +74,8 @@ class Tensors {
7474

7575
Expr findOrRemember(Expr node) {
7676
size_t hash = node->hash();
77+
78+
LOG(debug, "Looking for {}", node->name());
7779
// memoize constant nodes that are not parameters
7880
// parameters are already memoized in the graph itself
7981

@@ -117,6 +119,7 @@ class Tensors {
117119
auto it = longterm_->find(hash);
118120
if(it != longterm_->end()) {
119121
for(auto found : it->second) {
122+
LOG(debug, "Found (LT): {}", found->name() );
120123
return found;
121124
// @TODO: check why below code does not work for certain nodes and
122125
// autotuning.
@@ -127,14 +130,18 @@ class Tensors {
127130
}
128131
}
129132

130-
//std::cerr << "Longterm: " << longterm_->size() << " " << node->name() << " Type: " << node->type() << " shape: " << node->shape() << std::endl;
133+
// std::cerr << "Longterm: " << longterm_->size() << " " << node->name() << " Type: " << node->type() << " shape: " << node->shape() << std::endl;
134+
LOG(debug, "Insert (LT): {}", node->name());
131135
(*longterm_)[hash].push_back(node);
132136
}
133137

134138
auto it = shortterm_->find(hash);
135139
if(it != shortterm_->end()) {
136140
for(auto found : it->second) {
137141
if(node->equal(found)) {
142+
std::ostringstream addr;
143+
addr << &node;
144+
LOG(debug, "Found (ST): {} @ {}", found->name(), addr.str());
138145
return found;
139146
}
140147
}
@@ -170,7 +177,7 @@ class ExpressionGraph : public std::enable_shared_from_this<ExpressionGraph> {
170177
Ptr<Tensors> tensors_;
171178
private:
172179

173-
std::unordered_map<size_t, std::vector<Expr>> memoized_;
180+
// std::unordered_map<size_t, std::vector<Expr>> memoized_;
174181

175182
Type defaultElementType_{Type::float32}; // Type used for storing parameters, currently all parameters have to have the same type
176183

src/graph/node.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ class Node : public Chainable<Tensor> {
5151
: graph_(graph), shape_(shape), valueType_(valueType) {}
5252

5353
virtual ~Node() {
54+
std::ostringstream addr;
55+
addr << this;
56+
LOG(debug, "Destroying ({}): {}", addr.str(), this->name());
57+
5458
free();
5559
}
5660

src/graph/parameters.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ class Parameters {
7373
}
7474

7575
virtual void init(Ptr<Backend> backend) {
76-
vals_ = New<TensorAllocator>(backend);
77-
grads_ = New<TensorAllocator>(backend);
76+
vals_ = New<TensorAllocator>(backend, "vals");
77+
grads_ = New<TensorAllocator>(backend, "grads");
7878
}
7979

8080
virtual void init(Ptr<Backend> backend, Ptr<Device> device) {

src/optimizers/optimizers.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ void Sgd::updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t re
2121
void Adagrad::updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBWords) {
2222
ABORT_IF(actualMBSize != refMBWords, "Adagrad does not support rational hyper-parameter adjustment");
2323
if(!alloc_)
24-
alloc_ = New<TensorAllocator>(params->getBackend());
24+
alloc_ = New<TensorAllocator>(params->getBackend(), "Adagrad::updateImpl");
2525

2626
if(!gt_) {
2727
int elements = (int)params->size();
@@ -76,7 +76,7 @@ void Adagrad::load(const std::string& name,
7676
auto opt = std::dynamic_pointer_cast<Adagrad>(opts[localDeviceIndex]);
7777
if(!opt->gt_) {
7878
if(!opt->alloc_)
79-
opt->alloc_ = New<TensorAllocator>(backends[localDeviceIndex]);
79+
opt->alloc_ = New<TensorAllocator>(backends[localDeviceIndex], "Adagrad::load");
8080
auto size = end - begin;
8181
opt->alloc_->reserveExact(sizeof(float) * size);
8282
opt->alloc_->allocate(opt->gt_, {1, (int)size});
@@ -124,7 +124,7 @@ void Adagrad::resetStats() {
124124
void Adam::updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBWords) {
125125
// lazy allocation
126126
if(!alloc_)
127-
alloc_ = New<TensorAllocator>(params->getBackend());
127+
alloc_ = New<TensorAllocator>(params->getBackend(), "Adam::updateImpl");
128128

129129
if(!mt_) {
130130
int elements = (int)params->size();
@@ -226,7 +226,7 @@ void Adam::load(const std::string& name,
226226
auto opt = std::dynamic_pointer_cast<Adam>(opts[localDeviceIndex]);
227227
if(!opt->mt_ || !opt->vt_) { // lazily allocate
228228
if(!opt->alloc_)
229-
opt->alloc_ = New<TensorAllocator>(backends[localDeviceIndex]);
229+
opt->alloc_ = New<TensorAllocator>(backends[localDeviceIndex], "Adam::load");
230230
auto size = end-begin;
231231
opt->alloc_->reserveExact(2 * sizeof(float) * size);
232232
opt->alloc_->allocate(opt->mt_, {1, (int)size});

src/optimizers/quantizer.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ static void fixedPointQuantization(Tensor data, Tensor res, int numCenters, floa
3333
}
3434

3535
/* simulate a log-based quantization for values in data. The quantized value will be in the form of
36-
* S*2^q For example:
37-
* data = [0.9, 0.7, 0.5, 0.2 , 1.1]
36+
* S*2^q For example:
37+
* data = [0.9, 0.7, 0.5, 0.2 , 1.1]
3838
* res = [1, 0.5, 0.5, 0.25, 1 ]
3939
*
4040
* @param data contains the original data.
@@ -86,7 +86,7 @@ void ModelQuantizer::quantize(Ptr<ExpressionGraph> graph) {
8686
LOG(info, "Quantizing the model to {}-bits", bits_);
8787

8888
int numElements = (int)graph->params()->vals()->size();
89-
auto allocator = New<TensorAllocator>(graph->getBackend());
89+
auto allocator = New<TensorAllocator>(graph->getBackend(), "ModelQuantizer::quantize");
9090
allocator->reserveExact(graph->params()->vals()->memory()->size());
9191
allocator->allocate(errorResidual_, {1, numElements});
9292

@@ -99,7 +99,7 @@ void ModelQuantizer::quantize(Ptr<ExpressionGraph> graph) {
9999
{
100100
// apply error feedback mechanism
101101
using namespace functional;
102-
Element(_1 += _2, graph->params()->vals(), errorResidual_); // add the previous error residual to the current model
102+
Element(_1 += _2, graph->params()->vals(), errorResidual_); // add the previous error residual to the current model
103103
errorResidual_->copyFrom(graph->params()->vals()); // set the model as the error-residual (will be updated below)
104104
}
105105

@@ -127,7 +127,7 @@ void ModelQuantizer::quantize(Ptr<ExpressionGraph> graph) {
127127
void ModelQuantizer::quantizeImpl(Tensor t) {
128128
if(!tempVar_) {
129129
// init the swap tensor
130-
auto allocator = New<TensorAllocator>(t->getBackend());
130+
auto allocator = New<TensorAllocator>(t->getBackend(), "ModelQuantizer::quantizeImpl");
131131
allocator->reserveExact(sizeof(float));
132132
allocator->allocate(tempVar_, {1, 1});
133133
allocators_.push_back(allocator);
@@ -136,12 +136,12 @@ void ModelQuantizer::quantizeImpl(Tensor t) {
136136
// init additional tensor for scaling optimization
137137
if(!delta_ && optSteps_ > 0) {
138138
int msize = (int) errorResidual_->size();
139-
auto allocator = New<TensorAllocator>(errorResidual_->getBackend());
139+
auto allocator = New<TensorAllocator>(errorResidual_->getBackend(), "ModelQuantizer::quantizeImpl");
140140
allocator->reserveExact(msize * sizeof(float));
141141
allocator->allocate(delta_, {1, msize});
142142
allocators_.push_back(allocator);
143143
}
144-
144+
145145
Tensor tflat = t->subtensor(0, t->size()); // flatten t for reduce
146146

147147
float S = 0.0f; // scaling factor S
@@ -155,7 +155,7 @@ void ModelQuantizer::quantizeImpl(Tensor t) {
155155
// optimize the scaling factor S
156156
for(int i = 0; i < optSteps_; i++) {
157157
Tensor q = delta_->subtensor(0, t->size()); // to store the quantized t
158-
158+
159159
// let t be the original tensor, and q be the quantized tensor, and q = S*a where S is the
160160
// scaling factor. we want to optimize S to minimize MSE(S*a - t) therefore, S =
161161
// sum(a*t)/sum(a*a) see https://www.aclweb.org/anthology/2020.ngt-1.4.pdf for more details.

src/tensors/tensor_allocator.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,22 @@ class TensorAllocator {
1818

1919
Ptr<Backend> backend_;
2020
Ptr<Allocator> allocator_;
21+
std::string name_;
2122

2223
public:
23-
TensorAllocator(Ptr<Backend> backend)
24+
TensorAllocator(Ptr<Backend> backend, std::string name = "no_name")
2425
: backend_(backend),
25-
allocator_(New<Allocator>(backend_->getDeviceId(), 0, GROW, ALIGN)) {}
26+
allocator_(New<Allocator>(backend_->getDeviceId(), 0, GROW, ALIGN)),
27+
name_(name) {
28+
LOG(debug, "A new allocator {} is born {} on device", name_, backend_->getDeviceId());
29+
}
2630

2731
TensorAllocator(Ptr<Backend> backend, Ptr<Device> device)
2832
: backend_(backend),
29-
allocator_(New<Allocator>(backend_->getDeviceId(), device, 0, GROW, ALIGN)) {}
33+
allocator_(New<Allocator>(backend_->getDeviceId(), device, 0, GROW, ALIGN)),
34+
name_("unknown") {
35+
LOG(debug, "A new allocator {} is born {} on device", name_, backend_->getDeviceId());
36+
}
3037

3138
~TensorAllocator() { clear(); }
3239

@@ -49,7 +56,7 @@ class TensorAllocator {
4956
for(auto part : bytes)
5057
total += allocator_->alignedSize(part);
5158
reserveExact(total);
52-
}
59+
}
5360

5461
void reserveExact(size_t bytes = 0) {
5562
size_t mbytes = bytes / MBYTE;

src/training/communicator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ class DefaultCommunicator : public ICommunicator {
106106
for(auto graph : graphs_) {
107107
int __size__ = std::min(shardSize, totalSize);
108108

109-
auto paramsAlloc = New<TensorAllocator>(graph->getBackend());
109+
auto paramsAlloc = New<TensorAllocator>(graph->getBackend(), "DefaultCommunicator::lazyInit");
110110
paramsAllocs_.push_back(paramsAlloc);
111111

112112
paramsAlloc->reserveExact(__size__ * sizeof(float));

src/translator/translator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class Translate : public ModelTask {
110110
}
111111

112112
for(auto scorer : scorers) {
113+
LOG(debug, "Calling socrer->init(graph)");
113114
scorer->init(graph);
114115
if(shortlistGenerator_)
115116
scorer->setShortlistGenerator(shortlistGenerator_);

0 commit comments

Comments
 (0)