support multiple kernels in QEC when virtual table type is enabled (#2984)

emlin · facebook-github-bot · commit a95e7e18e2cc · 2025-05-20T11:40:21.000-07:00
Summary: Pull Request resolved: #2984 - Enabled separate embedding group for virtual table - Fixed feature order if feature is grouped differently with definition order Reviewed By: kausv Differential Revision: D73059492 fbshipit-source-id: 655ae97fb4c6a8e14574438f74d06d7f6eb8319b
diff --git a/torchrec/quant/embedding_modules.py b/torchrec/quant/embedding_modules.py
@@ -357,7 +357,7 @@ def __init__(
         self._is_weighted = is_weighted
         self._embedding_bag_configs: List[EmbeddingBagConfig] = tables
         self._key_to_tables: Dict[
-            Tuple[PoolingType, DataType], List[EmbeddingBagConfig]
+            Tuple[PoolingType, DataType, bool], List[EmbeddingBagConfig]
         ] = defaultdict(list)
         self._feature_names: List[str] = []
         self._feature_splits: List[int] = []
@@ -379,14 +379,16 @@ def __init__(
             if table.name in table_names:
                 raise ValueError(f"Duplicate table name {table.name}")
             table_names.add(table.name)
+            key = (table.pooling, table.use_virtual_table)
             # pyre-ignore
-            self._key_to_tables[table.pooling].append(table)
+            self._key_to_tables[key].append(table)
 
         location = (
             EmbeddingLocation.HOST if device.type == "cpu" else EmbeddingLocation.DEVICE
         )
 
-        for pooling, emb_configs in self._key_to_tables.items():
+        for key, emb_configs in self._key_to_tables.items():
+            pooling = key[0]
             embedding_specs = []
             weight_lists: Optional[
                 List[Tuple[torch.Tensor, Optional[torch.Tensor]]]
@@ -756,7 +758,9 @@ def __init__(  # noqa C901
         self._output_dtype = output_dtype
         self._device = device
         self.row_alignment = row_alignment
-        self._key_to_tables: Dict[DataType, List[EmbeddingConfig]] = defaultdict(list)
+        self._key_to_tables: Dict[Tuple[DataType, bool], List[EmbeddingConfig]] = (
+            defaultdict(list)
+        )
         self._feature_names: List[str] = []
         self._features_order: Optional[List[int]] = None
 
@@ -778,12 +782,11 @@ def __init__(  # noqa C901
                     + f" Violating case: {table.name}'s embedding_dim {table.embedding_dim} !="
                     + f" {self._embedding_dim}"
                 )
-            key = table.data_type
+            key = (table.data_type, table.use_virtual_table)
             self._key_to_tables[key].append(table)
-            self._feature_names.extend(table.feature_names)
         self._feature_splits: List[int] = []
         for key, emb_configs in self._key_to_tables.items():
-            data_type = key
+            data_type = key[0]
             embedding_specs = []
             weight_lists: Optional[
                 List[Tuple[torch.Tensor, Optional[torch.Tensor]]]
@@ -808,6 +811,9 @@ def __init__(  # noqa C901
                         table_name_to_quantized_weights[table.name]
                     )
                 feature_table_map.extend([idx] * table.num_features())
+                # move to here to make sure feature_names order is consistent with the embedding groups
+                self._feature_names.extend(table.feature_names)
+
             emb_module = IntNBitTableBatchedEmbeddingBagsCodegen(
                 embedding_specs=embedding_specs,
                 pooling_mode=PoolingMode.NONE,
@@ -852,7 +858,9 @@ def __init__(  # noqa C901
                         "weight_qbias", qbias
                     )
 
-        self._embedding_names_by_batched_tables: Dict[DataType, List[str]] = {
+        self._embedding_names_by_batched_tables: Dict[
+            Tuple[DataType, bool], List[str]
+        ] = {
             key: list(itertools.chain(*get_embedding_names_by_table(table)))
             for key, table in self._key_to_tables.items()
         }
diff --git a/torchrec/quant/tests/test_embedding_modules.py b/torchrec/quant/tests/test_embedding_modules.py
@@ -260,6 +260,44 @@ def test_multiple_features(self) -> None:
         )
         self._test_ebc([eb1_config, eb2_config], features)
 
+    def test_multiple_kernels_per_ebc_table(self) -> None:
+        class TestModule(torch.nn.Module):
+            def __init__(self, m: torch.nn.Module) -> None:
+                super().__init__()
+                self.m = m
+
+        eb1_config = EmbeddingBagConfig(
+            name="t1", embedding_dim=16, num_embeddings=10, feature_names=["f1"]
+        )
+        eb2_config = EmbeddingBagConfig(
+            name="t2",
+            embedding_dim=16,
+            num_embeddings=10,
+            feature_names=["f2"],
+            use_virtual_table=True,
+        )
+        eb3_config = EmbeddingBagConfig(
+            name="t3", embedding_dim=16, num_embeddings=10, feature_names=["f3"]
+        )
+        ebc = EmbeddingBagCollection(tables=[eb1_config, eb2_config, eb3_config])
+        model = TestModule(ebc)
+        qebc = trec_infer.modules.quantize_embeddings(
+            model,
+            dtype=torch.int8,
+            inplace=True,
+            per_table_weight_dtype={"t1": torch.float16},
+        )
+        self.assertTrue(isinstance(qebc.m, QuantEmbeddingBagCollection))
+        # feature name should be consistent with the order of grouped embeddings
+        self.assertEqual(qebc.m._feature_names, ["f1", "f3", "f2"])
+
+        features = KeyedJaggedTensor(
+            keys=["f1", "f2", "f3"],
+            values=torch.as_tensor([0, 1, 2]),
+            lengths=torch.as_tensor([1, 1, 1]),
+        )
+        self._test_ebc([eb1_config, eb2_config, eb3_config], features)
+
     # pyre-ignore
     @given(
         data_type=st.sampled_from(
@@ -742,6 +780,93 @@ def __init__(self, m: torch.nn.Module) -> None:
                 self.assertEqual(config.name, "t2")
                 self.assertEqual(config.data_type, DataType.INT8)
 
+    def test_multiple_kernels_per_ec_table(self) -> None:
+        class TestModule(torch.nn.Module):
+            def __init__(self, m: torch.nn.Module) -> None:
+                super().__init__()
+                self.m = m
+
+        eb1_config = EmbeddingConfig(
+            name="t1", embedding_dim=16, num_embeddings=10, feature_names=["f1"]
+        )
+        eb2_config = EmbeddingConfig(
+            name="t2",
+            embedding_dim=16,
+            num_embeddings=10,
+            feature_names=["f2"],
+            use_virtual_table=True,
+        )
+        eb3_config = EmbeddingConfig(
+            name="t3",
+            embedding_dim=16,
+            num_embeddings=10,
+            feature_names=["f3"],
+        )
+        ec = EmbeddingCollection(tables=[eb1_config, eb2_config, eb3_config])
+        model = TestModule(ec)
+        qconfig_spec_keys: List[Type[torch.nn.Module]] = [EmbeddingCollection]
+        quant_mapping: Dict[Type[torch.nn.Module], Type[torch.nn.Module]] = {
+            EmbeddingCollection: QuantEmbeddingCollection
+        }
+        qec = trec_infer.modules.quantize_embeddings(
+            model,
+            dtype=torch.int8,
+            additional_qconfig_spec_keys=qconfig_spec_keys,
+            additional_mapping=quant_mapping,
+            inplace=True,
+            per_table_weight_dtype={
+                "t1": torch.float16,
+                "t2": torch.float16,
+                "t3": torch.float16,
+            },
+        )
+        self.assertTrue(isinstance(qec.m, QuantEmbeddingCollection))
+        # feature name should be consistent with the order of grouped embeddings
+        self.assertEqual(qec.m._feature_names, ["f1", "f3", "f2"])
+
+        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
+        configs = model.m.embedding_configs()
+        self.assertEqual(len(configs), 3)
+        features = KeyedJaggedTensor(
+            keys=["f1", "f2", "f3"],
+            values=torch.as_tensor(
+                [
+                    5,
+                    1,
+                    0,
+                    0,
+                    4,
+                    3,
+                    4,
+                    9,
+                    2,
+                    2,
+                    3,
+                    3,
+                    1,
+                    5,
+                    0,
+                    7,
+                    5,
+                    0,
+                    9,
+                    9,
+                    3,
+                    5,
+                    6,
+                    6,
+                    9,
+                    3,
+                    7,
+                    8,
+                    7,
+                    7,
+                ]
+            ),
+            lengths=torch.as_tensor([9, 12, 9]),
+        )
+        self._test_ec(tables=[eb3_config, eb1_config, eb2_config], features=features)
+
     def test_different_quantization_dtype_per_ebc_table(self) -> None:
         class TestModule(torch.nn.Module):
             def __init__(self, m: torch.nn.Module) -> None: