Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,10 @@ def process_stats(stats: WCTensorStatistic, subset_size: int) -> tuple[Tensor, T

# Prevent high memory and time consumption by sampling
if X_full.shape[sample_axis] > subset_size:
# Activations were reduced across all but the last dimension
lens = [reduce(mul, shape[:-1], 1) for shape in stats.shape_values]
step = X_full.shape[sample_axis] // subset_size
sorted_idxs = [i[0] for i in sorted(enumerate(lens), key=lambda x: -x[1])][::step]
idxs = [idx for idx in sorted_idxs if idx < X_full.shape[sample_axis]][:subset_size]

# Create index slices for all dimensions except the last one
# This works for both 2D and 3D (and theoretically any dimensionality)
index_slices = [slice(None)] * (len(X_full.shape) - 1) + [idxs]
X = X_full[tuple(index_slices)]
idxs = [i[0] for i in sorted(enumerate(lens), key=lambda x: -x[1])][::step]
X = X_full[..., idxs]
else:
X = X_full

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,20 +240,25 @@ def get_moe_model_for_test_scale_estimation() -> TModel:

@staticmethod
@abstractmethod
def get_moe_scale_estimation_ref() -> TTensor:
def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow: bool) -> TTensor:
"""
:param check_sampling_activation_stats_flow: whether we are checking the flow with sampling when processing
activation statistics
Returns the reference output of calculate_quantization_params for MoE model.
"""

@staticmethod
@abstractmethod
def get_scale_estimation_ref() -> TTensor:
def get_scale_estimation_ref(check_sampling_activation_stats_flow: bool) -> TTensor:
"""
:param check_sampling_activation_stats_flow: whether we are checking the flow with sampling when processing
activation statistics
Returns the reference output of calculate_quantization_params of ScaleEstimation.
"""

@pytest.mark.parametrize("is_moe", [False, True])
def test_scale_estimation(self, mocker, is_moe):
@pytest.mark.parametrize("check_sampling_activation_stats_flow", [False, True])
def test_scale_estimation(self, mocker, is_moe, check_sampling_activation_stats_flow):
"""Checks that scales match the reference."""
calc_q_params_spy = mocker.spy(ScaleEstimation, "calculate_quantization_params")

Expand All @@ -264,9 +269,15 @@ def test_scale_estimation(self, mocker, is_moe):
model = self.get_model_for_test_scale_estimation()
input = np.arange(0, 4 * 8, dtype=np.float32).reshape(1, 4, 8)

# prepare dataset with one input tensor
# prepare dataset of size subset_size with input tensors
subset_size = 2 if check_sampling_activation_stats_flow else 1
# make sure that subset size for SE < subset size for statistics collection.
# This is to test the Optimized statistics processing flow which samples only a few data
# points in nncf/quantization/algorithms/weight_compression/activation_stats.py
se_subset_size = subset_size // 2 if check_sampling_activation_stats_flow else subset_size
input = self.to_tensor(input)
dataset = Dataset([input], self.get_transform_func())

dataset = Dataset([input + i for i in range(subset_size)], self.get_transform_func())

with SpyWeightCompressionStatisticsContext(mocker):
_ = compress_weights(
Expand All @@ -277,15 +288,18 @@ def test_scale_estimation(self, mocker, is_moe):
scale_estimation=True,
all_layers=True,
dataset=dataset,
subset_size=subset_size,
advanced_parameters=nncf.AdvancedCompressionParameters(
scale_estimation_params=nncf.AdvancedScaleEstimationParameters(subset_size=se_subset_size)
),
)

computed_scale = calc_q_params_spy.spy_return[0]

if is_moe:
reference = self.get_moe_scale_estimation_ref()
reference = self.get_moe_scale_estimation_ref(check_sampling_activation_stats_flow)
else:
reference = self.get_scale_estimation_ref()

reference = self.get_scale_estimation_ref(check_sampling_activation_stats_flow)
assert fns.allclose(Tensor(reference), computed_scale)

@staticmethod
Expand Down
196 changes: 134 additions & 62 deletions tests/onnx/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,78 +484,150 @@ def get_moe_model_for_test_scale_estimation() -> onnx.ModelProto:
return mb.build(opset_version=21)

@staticmethod
def get_scale_estimation_ref():
return np.array(
[
[[0.473328]],
[[0.929023]],
[[1.446527]],
[[1.920595]],
[[2.517053]],
[[3.030101]],
[[3.584278]],
[[4.04351]],
[[4.620007]],
[[5.165322]],
[[5.710637]],
[[6.122580]],
[[6.655914]],
[[7.237173]],
[[7.722581]],
[[8.255914]],
]
).T
def get_scale_estimation_ref(check_sampling_activation_stats_flow):
return (
np.array(
[
[[0.473328]],
[[0.929023]],
[[1.446527]],
[[1.920595]],
[[2.517054]],
[[3.030102]],
[[3.584279]],
[[4.043509]],
[[4.620008]],
[[5.165322]],
[[5.710637]],
[[6.122581]],
[[6.655914]],
[[7.237174]],
[[7.722580]],
[[8.255914]],
]
).T,
np.array(
[
[[0.47344488]],
[[0.9287766]],
[[1.4463282]],
[[1.920052]],
[[2.5167778]],
[[3.02987]],
[[3.5842714]],
[[4.0429296]],
[[4.619769]],
[[5.165224]],
[[5.7106786]],
[[6.121212]],
[[6.654546]],
[[7.2366524]],
[[7.7212124]],
[[8.254545]],
]
).T,
)[check_sampling_activation_stats_flow]

@staticmethod
def get_moe_scale_estimation_ref():
return np.array(
[
def get_moe_scale_estimation_ref(check_sampling_activation_stats_flow):
return (
np.array(
[
[
[
7.573249,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.260152,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.3082952,
7.846745,
7.223278,
7.271495,
7.420518,
7.4666667,
[
7.5732,
7.4667,
7.4667,
7.4667,
7.4667,
7.2602,
7.4667,
7.4667,
7.4667,
7.4667,
7.3083,
7.8467,
7.2233,
7.2715,
7.4205,
7.4667,
]
]
],
[
[
[
14.8205,
14.9032,
14.9858,
15.0685,
15.1512,
14.3400,
14.4173,
14.4945,
14.5718,
14.6491,
14.7264,
14.8037,
14.8810,
14.9583,
15.0355,
15.1128,
]
]
]
],
],
]
),
np.array(
[
[
[
14.820505,
14.903171,
14.985837,
15.068501,
15.151169,
14.339979,
14.417264,
14.494548,
14.571833,
14.649117,
14.726402,
14.803687,
14.880971,
14.958257,
15.035541,
15.112826,
[
7.575118,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.254837,
7.4666667,
7.4666667,
7.4666667,
7.4666667,
7.495066,
7.850108,
7.219489,
7.2685375,
7.418597,
7.4666667,
]
]
]
],
]
)
],
[
[
[
14.820066,
14.902746,
14.985427,
15.068108,
15.150787,
14.3391285,
14.416424,
14.493721,
14.571016,
14.648311,
14.725608,
14.802904,
14.8801985,
14.957496,
15.034791,
15.112087,
]
]
],
]
),
)[check_sampling_activation_stats_flow]

@staticmethod
def get_orig_weight(model: onnx.ModelProto) -> Tensor:
Expand Down
Loading