@@ -33,8 +33,8 @@ static void fixedPointQuantization(Tensor data, Tensor res, int numCenters, floa
3333}
3434
3535/* simulate a log-based quantization for values in data. The quantized value will be in the form of
36- * S*2^q For example:
37- * data = [0.9, 0.7, 0.5, 0.2 , 1.1]
36+ * S*2^q For example:
37+ * data = [0.9, 0.7, 0.5, 0.2 , 1.1]
3838 * res = [1, 0.5, 0.5, 0.25, 1 ]
3939 *
4040 * @param data contains the original data.
@@ -86,7 +86,7 @@ void ModelQuantizer::quantize(Ptr<ExpressionGraph> graph) {
8686 LOG (info, " Quantizing the model to {}-bits" , bits_);
8787
8888 int numElements = (int )graph->params ()->vals ()->size ();
89- auto allocator = New<TensorAllocator>(graph->getBackend ());
89+ auto allocator = New<TensorAllocator>(graph->getBackend (), " ModelQuantizer::quantize " );
9090 allocator->reserveExact (graph->params ()->vals ()->memory ()->size ());
9191 allocator->allocate (errorResidual_, {1 , numElements});
9292
@@ -99,7 +99,7 @@ void ModelQuantizer::quantize(Ptr<ExpressionGraph> graph) {
9999 {
100100 // apply error feedback mechanism
101101 using namespace functional ;
102- Element (_1 += _2, graph->params ()->vals (), errorResidual_); // add the previous error residual to the current model
102+ Element (_1 += _2, graph->params ()->vals (), errorResidual_); // add the previous error residual to the current model
103103 errorResidual_->copyFrom (graph->params ()->vals ()); // set the model as the error-residual (will be updated below)
104104 }
105105
@@ -127,7 +127,7 @@ void ModelQuantizer::quantize(Ptr<ExpressionGraph> graph) {
127127void ModelQuantizer::quantizeImpl (Tensor t) {
128128 if (!tempVar_) {
129129 // init the swap tensor
130- auto allocator = New<TensorAllocator>(t->getBackend ());
130+ auto allocator = New<TensorAllocator>(t->getBackend (), " ModelQuantizer::quantizeImpl " );
131131 allocator->reserveExact (sizeof (float ));
132132 allocator->allocate (tempVar_, {1 , 1 });
133133 allocators_.push_back (allocator);
@@ -136,12 +136,12 @@ void ModelQuantizer::quantizeImpl(Tensor t) {
136136 // init additional tensor for scaling optimization
137137 if (!delta_ && optSteps_ > 0 ) {
138138 int msize = (int ) errorResidual_->size ();
139- auto allocator = New<TensorAllocator>(errorResidual_->getBackend ());
139+ auto allocator = New<TensorAllocator>(errorResidual_->getBackend (), " ModelQuantizer::quantizeImpl " );
140140 allocator->reserveExact (msize * sizeof (float ));
141141 allocator->allocate (delta_, {1 , msize});
142142 allocators_.push_back (allocator);
143143 }
144-
144+
145145 Tensor tflat = t->subtensor (0 , t->size ()); // flatten t for reduce
146146
147147 float S = 0 .0f ; // scaling factor S
@@ -155,7 +155,7 @@ void ModelQuantizer::quantizeImpl(Tensor t) {
155155 // optimize the scaling factor S
156156 for (int i = 0 ; i < optSteps_; i++) {
157157 Tensor q = delta_->subtensor (0 , t->size ()); // to store the quantized t
158-
158+
159159 // let t be the original tensor, and q be the quantized tensor, and q = S*a where S is the
160160 // scaling factor. we want to optimize S to minimize MSE(S*a - t) therefore, S =
161161 // sum(a*t)/sum(a*a) see https://www.aclweb.org/anthology/2020.ngt-1.4.pdf for more details.
0 commit comments