Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,10 @@ def process_stats(stats: WCTensorStatistic, subset_size: int) -> tuple[Tensor, T

# Prevent high memory and time consumption by sampling
if X_full.shape[sample_axis] > subset_size:
# Activations were reduced across all but the last dimension
lens = [reduce(mul, shape[:-1], 1) for shape in stats.shape_values]
step = X_full.shape[sample_axis] // subset_size
sorted_idxs = [i[0] for i in sorted(enumerate(lens), key=lambda x: -x[1])][::step]
idxs = [idx for idx in sorted_idxs if idx < X_full.shape[sample_axis]][:subset_size]

# Create index slices for all dimensions except the last one
# This works for both 2D and 3D (and theoretically any dimensionality)
index_slices = [slice(None)] * (len(X_full.shape) - 1) + [idxs]
X = X_full[tuple(index_slices)]
idxs = [i[0] for i in sorted(enumerate(lens), key=lambda x: -x[1])][::step]
X = X_full[..., idxs]
else:
X = X_full

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import math
from abc import ABC
from abc import abstractmethod
from functools import reduce
from operator import mul
from typing import Any, Callable, Optional, TypeVar
from unittest.mock import patch

Expand All @@ -28,6 +30,8 @@
from nncf.quantization import compress_weights
from nncf.quantization.advanced_parameters import AdvancedAWQParameters as AWQParams
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters as CompressionParams
from nncf.quantization.algorithms.weight_compression.activation_stats import WCTensorStatistic
from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression
from nncf.quantization.algorithms.weight_compression.awq import AWQ
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
Expand Down Expand Up @@ -240,20 +244,25 @@ def get_moe_model_for_test_scale_estimation() -> TModel:

@staticmethod
@abstractmethod
def get_moe_scale_estimation_ref() -> TTensor:
def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow: bool) -> TTensor:
"""
:param check_sampling_activation_stats_flow: whether we are checking the flow with sampling when processing
activation statistics
Returns the reference output of calculate_quantization_params for MoE model.
"""

@staticmethod
@abstractmethod
def get_scale_estimation_ref() -> TTensor:
def get_scale_estimation_ref(check_sampling_activation_stats_flow: bool) -> TTensor:
"""
:param check_sampling_activation_stats_flow: whether we are checking the flow with sampling when processing
activation statistics
Returns the reference output of calculate_quantization_params of ScaleEstimation.
"""

@pytest.mark.parametrize("is_moe", [False, True])
def test_scale_estimation(self, mocker, is_moe):
@pytest.mark.parametrize("check_sampling_activation_stats_flow", [False, True])
def test_scale_estimation(self, mocker, is_moe, check_sampling_activation_stats_flow):
"""Checks that scales match the reference."""
calc_q_params_spy = mocker.spy(ScaleEstimation, "calculate_quantization_params")

Expand All @@ -264,9 +273,15 @@ def test_scale_estimation(self, mocker, is_moe):
model = self.get_model_for_test_scale_estimation()
input = np.arange(0, 4 * 8, dtype=np.float32).reshape(1, 4, 8)

# prepare dataset with one input tensor
# prepare dataset of size subset_size with input tensors
subset_size = 2 if check_sampling_activation_stats_flow else 1
# make sure that subset size for SE < subset size for statistics collection.
# This is to test the Optimized statistics processing flow which samples only a few data
# points in nncf/quantization/algorithms/weight_compression/activation_stats.py
se_subset_size = subset_size // 2 if check_sampling_activation_stats_flow else subset_size
input = self.to_tensor(input)
dataset = Dataset([input], self.get_transform_func())

dataset = Dataset([input + i for i in range(subset_size)], self.get_transform_func())

with SpyWeightCompressionStatisticsContext(mocker):
_ = compress_weights(
Expand All @@ -277,15 +292,18 @@ def test_scale_estimation(self, mocker, is_moe):
scale_estimation=True,
all_layers=True,
dataset=dataset,
subset_size=subset_size,
advanced_parameters=nncf.AdvancedCompressionParameters(
scale_estimation_params=nncf.AdvancedScaleEstimationParameters(subset_size=se_subset_size)
),
)

computed_scale = calc_q_params_spy.spy_return[0]

if is_moe:
reference = self.get_moe_scale_estimation_ref()
reference = self.get_moe_scale_estimation_ref(check_sampling_activation_stats_flow)
else:
reference = self.get_scale_estimation_ref()

reference = self.get_scale_estimation_ref(check_sampling_activation_stats_flow)
assert fns.allclose(Tensor(reference), computed_scale)

@staticmethod
Expand Down Expand Up @@ -643,3 +661,46 @@ def get_transform_func() -> Optional[Callable[..., Any]]:
@staticmethod
def get_reduction_axes() -> int:
return 1

@pytest.mark.parametrize(
"mean_values_shape,num_samples,subset_size,expected_s_shape,expected_X_shape,expected_indices",
[
# 2D Activations
((8,), 10, 5, (8,), (8, 5), [0, 2, 4, 6, 8]),
((8,), 5, 10, (8,), (8, 5), [0, 1, 2, 3, 4]),
((8,), 12, 5, (8,), (8, 6), [0, 2, 4, 6, 8, 10]),
# 3D Activations
((4, 8), 10, 5, (4, 8), (4, 8, 5), [0, 2, 4, 6, 8]),
((4, 8), 5, 10, (4, 8), (4, 8, 5), [0, 1, 2, 3, 4]),
((4, 8), 25, 8, (4, 8), (4, 8, 9), [0, 3, 6, 9, 12, 15, 18, 21, 24]),
],
)
def test_process_stats(
self, mean_values_shape, num_samples, subset_size, expected_s_shape, expected_X_shape, expected_indices
):
total_elements = reduce(mul, mean_values_shape, 1)
mean_values = [
Tensor(np.arange(i * total_elements, (i + 1) * total_elements, dtype=np.float32).reshape(mean_values_shape))
for i in range(num_samples)
]
shape_values = [(1,) + mean_values_shape for _ in range(num_samples)]

stats = WCTensorStatistic(mean_values=mean_values, shape_values=shape_values)

s, X = process_stats(stats, subset_size)

assert s.shape == expected_s_shape, f"Expected s shape {expected_s_shape}, got {s.shape}"
assert X.shape == expected_X_shape, f"Expected X shape {expected_X_shape}, got {X.shape}"

X_full_list = [mean_values[i] for i in range(num_samples)]
X_full = fns.stack(X_full_list)
axes = list(range(1, len(X_full.shape))) + [0]
X_full_transposed = fns.transpose(X_full, axes=axes)

for idx, sample_idx in enumerate(expected_indices):
expected_sample = X_full_transposed[..., sample_idx]
actual_sample = X[..., idx]
assert fns.all(actual_sample == expected_sample)

expected_s = fns.max(fns.abs(X_full_transposed), axis=-1)
assert fns.all(s == expected_s)
196 changes: 134 additions & 62 deletions tests/onnx/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,78 +484,150 @@ def get_moe_model_for_test_scale_estimation() -> onnx.ModelProto:
return mb.build(opset_version=21)

@staticmethod
def get_scale_estimation_ref():
return np.array(
[
[[0.473328]],
[[0.929023]],
[[1.446527]],
[[1.920595]],
[[2.517053]],
[[3.030101]],
[[3.584278]],
[[4.04351]],
[[4.620007]],
[[5.165322]],
[[5.710637]],
[[6.122580]],
[[6.655914]],
[[7.237173]],
[[7.722581]],
[[8.255914]],
]
).T
def get_scale_estimation_ref(check_sampling_activation_stats_flow):
return (
np.array(
[
[[0.473328]],
[[0.929023]],
[[1.446527]],
[[1.920595]],
[[2.517054]],
[[3.030102]],
[[3.584279]],
[[4.043509]],
[[4.620008]],
[[5.165322]],
[[5.710637]],
[[6.122581]],
[[6.655914]],
[[7.237174]],
[[7.722580]],
[[8.255914]],
]
).T,
np.array(
[
[[0.47344488]],
[[0.9287766]],
[[1.4463282]],
[[1.920052]],
[[2.5167778]],
[[3.02987]],
[[3.5842714]],
[[4.0429296]],
[[4.619769]],
[[5.165224]],
[[5.7106786]],
[[6.121212]],
[[6.654546]],
[[7.2366524]],
[[7.7212124]],
[[8.254545]],
]
).T,
)[check_sampling_activation_stats_flow]

@staticmethod
def get_moe_scale_estimation_ref():
return np.array(
[
def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow):
return (
np.array(
[
[
[
7.573249,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.260152,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.3082952,
7.846745,
7.223278,
7.271495,
7.420518,
7.4666667,
[
7.5732,
7.4667,
7.4667,
7.4667,
7.4667,
7.2602,
7.4667,
7.4667,
7.4667,
7.4667,
7.3083,
7.8467,
7.2233,
7.2715,
7.4205,
7.4667,
]
]
],
[
[
[
14.8205,
14.9032,
14.9858,
15.0685,
15.1512,
14.3400,
14.4173,
14.4945,
14.5718,
14.6491,
14.7264,
14.8037,
14.8810,
14.9583,
15.0355,
15.1128,
]
]
]
],
],
]
),
np.array(
[
[
[
14.820505,
14.903171,
14.985837,
15.068501,
15.151169,
14.339979,
14.417264,
14.494548,
14.571833,
14.649117,
14.726402,
14.803687,
14.880971,
14.958257,
15.035541,
15.112826,
[
7.575118,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.254837,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.495066,
7.850108,
7.219489,
7.2685375,
7.418597,
7.4666667,
]
]
]
],
]
)
],
[
[
[
14.820066,
14.902746,
14.985427,
15.068108,
15.150787,
14.3391285,
14.416424,
14.493721,
14.571016,
14.648311,
14.725608,
14.802904,
14.8801985,
14.957496,
15.034791,
15.112087,
]
]
],
]
),
)[check_sampling_activation_stats_flow]

@staticmethod
def get_orig_weight(model: onnx.ModelProto) -> Tensor:
Expand Down
Loading