NVIDIA-Merlin
diff --git a/‎merlin/datasets/entertainment/music_streaming/schema.json‎
Lines changed: 10 additions & 0 deletions b/‎merlin/datasets/entertainment/music_streaming/schema.json‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎merlin/datasets/testing/schema.json‎
Lines changed: 13 additions & 4 deletions b/‎merlin/datasets/testing/schema.json‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎merlin/datasets/testing/sequence_testing/schema.json‎
Lines changed: 12 additions & 2 deletions b/‎merlin/datasets/testing/sequence_testing/schema.json‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎merlin/models/tf/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎merlin/models/tf/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎merlin/models/tf/blocks/mlp.py‎
Lines changed: 6 additions & 1 deletion b/‎merlin/models/tf/blocks/mlp.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎merlin/models/tf/core/aggregation.py‎
Lines changed: 10 additions & 4 deletions b/‎merlin/models/tf/core/aggregation.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎merlin/models/tf/inputs/base.py‎
Lines changed: 15 additions & 3 deletions b/‎merlin/models/tf/inputs/base.py‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎merlin/models/tf/inputs/embedding.py‎
Lines changed: 109 additions & 10 deletions b/‎merlin/models/tf/inputs/embedding.py‎
Lines changed: 109 additions & 10 deletions
@@ -42,6 +42,16 @@
         "tag": [
           "categorical",
           "item"
+        ],
+        "extraMetadata": [
+          {
+            "_dims": [
+              [
+                0.0,
+                null
+              ]
+            ]
+          }
         ]
       }
     },
 
@@ -6,7 +6,7 @@
       "intDomain": {
         "name": "user_id",
         "min": "1",
-        "max": "1797",
+        "max": "90",
         "isCategorical": true
       },
       "annotation": {
@@ -90,7 +90,6 @@
         "tag": [
           "continuous",
           "item"
-
         ]
       }
     },
@@ -100,14 +99,24 @@
       "intDomain": {
         "name": "item_id",
         "min": "1",
-        "max": "51996",
+        "max": "100",
         "isCategorical": true
       },
       "annotation": {
         "tag": [
           "item_id",
           "categorical",
           "item"
+        ],
+        "extraMetadata": [
+          {
+            "_dims": [
+              [
+                0.0,
+                null
+              ]
+            ]
+          }
         ]
       }
     },
@@ -121,7 +130,7 @@
       "intDomain": {
         "name": "categories",
         "min": "1",
-        "max": "331",
+        "max": "70",
         "isCategorical": true
       },
       "annotation": {
 
@@ -6,14 +6,24 @@
       "intDomain": {
         "name": "test_user_id",
         "min": "1",
-        "max": "1797",
+        "max": "90",
         "isCategorical": true
       },
       "annotation": {
         "tag": [
           "categorical",
           "user_id",
           "user"
+        ],
+        "extraMetadata": [
+          {
+            "_dims": [
+              [
+                0.0,
+                null
+              ]
+            ]
+          }
         ]
       }
     },
@@ -122,7 +132,7 @@
       "intDomain": {
         "name": "item_id_seq",
         "min": "1",
-        "max": "51996",
+        "max": "100",
         "isCategorical": true
       },
       "annotation": {
 
@@ -58,6 +58,7 @@
     ConcatFeatures,
     ElementwiseSum,
     ElementwiseSumItemMulti,
+    SequenceAggregator,
     StackFeatures,
 )
 from merlin.models.tf.core.base import (
@@ -86,6 +87,7 @@
     Embeddings,
     EmbeddingTable,
     FeatureConfig,
+    PretrainedEmbeddings,
     SequenceEmbeddingFeatures,
     TableConfig,
 )
@@ -215,6 +217,7 @@
     "EmbeddingTable",
     "AverageEmbeddingsByWeightFeature",
     "Embeddings",
+    "PretrainedEmbeddings",
     "FeatureConfig",
     "TableConfig",
     "ParallelPredictionBlock",
@@ -236,6 +239,7 @@
     "Filter",
     "ParallelBlock",
     "StackFeatures",
+    "SequenceAggregator",
     "DotProductInteraction",
     "FMPairwiseInteraction",
     "FMBlock",
 
@@ -96,7 +96,12 @@ def MLPBlock(
 
     for idx, dim in enumerate(dimensions):
         dropout_layer = None
-        activation_idx = activation if isinstance(activation, str) else activation[idx]
+        activation = activation or "linear"
+        if isinstance(activation, str):
+            activation_idx = activation
+        else:
+            activation_idx = activation[idx]
+
         if no_activation_last_layer and idx == len(dimensions) - 1:
             activation_idx = "linear"
         else:
 
@@ -414,8 +414,10 @@ def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
                     outputs[k] = v
             return outputs
         else:
-            assert len(inputs.shape) == 3, "Tensor inputs should be 3-D"
-            return combiner(inputs, axis=self.axis, **kwargs)
+            if inputs.get_shape().rank > self.axis + 1:
+                return combiner(inputs, axis=self.axis, **kwargs)
+            else:
+                return inputs
 
     def parse_combiner(self, combiner):
         if isinstance(combiner, str):
@@ -441,8 +443,12 @@ def compute_output_shape(self, input_shape):
                     outputs[k] = v
             return outputs
         else:
-            batch_size, _, last_dim = input_shape
-            return batch_size, last_dim
+            if len(input_shape) > self.axis + 1:
+                return tf.TensorShape(
+                    list(input_shape)[: self.axis] + list(input_shape)[self.axis + 1 :]
+                )
+            else:
+                return input_shape
 
     def get_config(self):
         config = super().get_config()
 
@@ -29,6 +29,7 @@
     EmbeddingFeatures,
     EmbeddingOptions,
     Embeddings,
+    PretrainedEmbeddings,
     SequenceEmbeddingFeatures,
 )
 from merlin.schema import Schema, Tags, TagsType
@@ -206,13 +207,15 @@ def InputBlock(
 INPUT_TAG_TO_BLOCK: Dict[Tags, Callable[[Schema], Layer]] = {
     Tags.CONTINUOUS: Continuous,
     Tags.CATEGORICAL: Embeddings,
+    Tags.EMBEDDING: PretrainedEmbeddings,
 }
 
 
 def InputBlockV2(
     schema: Optional[Schema] = None,
     categorical: Union[Tags, Layer] = Tags.CATEGORICAL,
     continuous: Union[Tags, Layer] = Tags.CONTINUOUS,
+    pretrained_embeddings: Union[Tags, Layer] = Tags.EMBEDDING,
     pre: Optional[BlockType] = None,
     post: Optional[BlockType] = None,
     aggregation: Optional[TabularAggregationType] = "concat",
@@ -262,11 +265,15 @@ def InputBlockV2(
     categorical : Union[Tags, Layer], defaults to `Tags.CATEGORICAL`
         A block or column-selector to use for categorical-features.
         If a column-selector is provided (either a schema or tags), the selector
-        will be passed to `Embeddings` to infer the embedding tables from the column-selector.
+        will be passed to `Embeddings()` to infer the embedding tables from the column-selector.
     continuous : Union[Tags, Layer], defaults to `Tags.CONTINUOUS`
         A block to use for continuous-features.
         If a column-selector is provided (either a schema or tags), the selector
-        will be passed to `Continuous` to infer the features from the column-selector.
+        will be passed to `Continuous()` to infer the features from the column-selector.
+    pretrained_embeddings : Union[Tags, Layer], defaults to `Tags.EMBEDDING`
+        A block to use for pre-trained embeddings
+        If a column-selector is provided (either a schema or tags), the selector
+        will be passed to `PretrainedEmbeddings()` to infer the features from the column-selector.
     pre : Optional[BlockType], optional
         Transformation block to apply before the embeddings lookup, by default None
     post : Optional[BlockType], optional
@@ -297,7 +304,12 @@ def InputBlockV2(
         )
         categorical = branches["embeddings"]
 
-    unparsed = {"categorical": categorical, "continuous": continuous, **branches}
+    unparsed = {
+        "categorical": categorical,
+        "continuous": continuous,
+        "pretrained_embeddings": pretrained_embeddings,
+        **branches,
+    }
     parsed = {}
     for name, branch in unparsed.items():
         if isinstance(branch, Layer):
 
@@ -27,8 +27,9 @@
 import merlin.io
 from merlin.core.dispatch import DataFrameType
 from merlin.io import Dataset
-from merlin.models.tf.blocks.mlp import InitializerType, RegularizerType
-from merlin.models.tf.core.base import Block, BlockType
+from merlin.models.tf.blocks.mlp import InitializerType, MLPBlock, RegularizerType
+from merlin.models.tf.core.aggregation import SequenceAggregator
+from merlin.models.tf.core.base import Block, BlockType, NoOp, block_registry
 from merlin.models.tf.core.combinators import ParallelBlock, SequentialBlock
 from merlin.models.tf.core.tabular import (
     TABULAR_MODULE_PARAMS_DOCSTRING,
@@ -423,11 +424,7 @@ def _call_table(self, inputs, **kwargs):
             if inputs.shape.as_list()[-1] == 1:
                 inputs = tf.squeeze(inputs, axis=-1)
             out = call_layer(self.table, inputs, **kwargs)
-            if len(out.get_shape()) > 2 and self.sequence_combiner is not None:
-                if isinstance(self.sequence_combiner, tf.keras.layers.Layer):
-                    out = call_layer(self.sequence_combiner, out, **kwargs)
-                elif isinstance(self.sequence_combiner, str):
-                    out = process_str_sequence_combiner(out, self.sequence_combiner, **kwargs)
+            out = process_sequence_combiner(out, self.sequence_combiner, **kwargs)
 
         if self.l2_batch_regularization_factor > 0:
             self.add_loss(self.l2_batch_regularization_factor * tf.reduce_sum(tf.square(out)))
@@ -625,6 +622,95 @@ def _get_dim(col, embedding_dims, infer_dim_fn):
     return dim
 
 
+def PretrainedEmbeddings(
+    schema: Schema,
+    output_dims: Optional[Union[Dict[str, int], int]] = None,
+    sequence_combiner: Optional[Union[CombinerType, Dict[str, CombinerType]]] = "mean",
+    normalizer: Union[str, tf.keras.layers.Layer] = None,
+    pre: Optional[BlockType] = None,
+    post: Optional[BlockType] = None,
+    aggregation: Optional[TabularAggregationType] = None,
+    block_name: str = "pretrained_embeddings",
+    **kwargs,
+) -> ParallelBlock:
+    """Creates a ParallelBlock with branch for each pre-trained embedding feature
+    in the schema.
+
+    Parameters
+    ----------
+    schema: Schema
+        Schema of the input data, with the pre-trained embeddings.
+        You typically will pass schema.select_by_tag(Tags.EMBEDDING), as that is the tag
+        added to pre-trained embedding features when using the
+        merlin.dataloader.ops.embeddings.EmbeddingOperator
+    output_dims: Optional[Union[Dict[str, int], int]], optional
+        If provided, it projects features to specified dim(s).
+        If an int, all features are projected to that dim.
+        If a dict, only features provided in the dict will be mapped to the specified dim,
+        for example {"feature_name": projection_dim, ...}. By default None
+    sequence_combiner: Optional[Union[str, tf.keras.layers.Layer]], optional
+       A string ("mean", "sum", "max", "min") or Layer specifying
+       how to combine the second dimension of
+       the pre-trained embeddings if it is 3D.
+       Default is None (no sequence combiner used)
+    normalizer: Union[str, tf.keras.layers.Layer], optional
+       A Layer (e.g. mm.L2Norm()) or string ("l2-norm") to be applied
+       to pre-trained embeddings after projected and sequence combined
+       Default is None (no normalization)
+    pre: Optional[BlockType], optional
+        Transformation block to apply before the embeddings lookup, by default None
+    post: Optional[BlockType], optional
+        Transformation block to apply after the embeddings lookup, by default None
+    aggregation: Optional[TabularAggregationType], optional
+        Transformation block to apply for aggregating the inputs, by default None
+    block_name: str, optional
+        Name of the block, by default "pretrained_embeddings"
+    Returns
+    -------
+    ParallelBlock
+        Returns a parallel block with a branch for each pre-trained embedding
+    """
+
+    tables = {}
+
+    for col in schema:
+        table_name = col.name
+
+        tables[table_name] = NoOp()
+
+        if output_dims:
+            new_dim = output_dims
+            if isinstance(output_dims, dict):
+                if table_name in output_dims:
+                    new_dim = (
+                        output_dims[table_name] if isinstance(output_dims, dict) else output_dims
+                    )
+                else:
+                    new_dim = None
+            if new_dim:
+                tables[table_name] = MLPBlock([new_dim], activation=None)
+
+        if sequence_combiner:
+            if isinstance(sequence_combiner, str):
+                sequence_combiner = SequenceAggregator(sequence_combiner)
+
+            tables[table_name] = SequentialBlock([tables[table_name], sequence_combiner])
+
+        if normalizer:
+            normalizer = block_registry.parse(normalizer)
+            tables[table_name] = SequentialBlock([tables[table_name], normalizer])
+
+    return ParallelBlock(
+        tables,
+        pre=pre,
+        post=post,
+        aggregation=aggregation,
+        name=block_name,
+        schema=schema,
+        **kwargs,
+    )
+
+
 @tf.keras.utils.register_keras_serializable(package="merlin.models")
 class AverageEmbeddingsByWeightFeature(tf.keras.layers.Layer):
     def __init__(self, weight_feature_name: str, axis=1, **kwargs):
@@ -1215,17 +1301,28 @@ def serialize_feature_config(feature_config: FeatureConfig) -> Dict[str, Any]:
     return outputs
 
 
+def process_sequence_combiner(inputs, combiner, **kwargs):
+    result = inputs
+    if len(inputs.get_shape()) > 2 and combiner:
+        if isinstance(combiner, tf.keras.layers.Layer):
+            result = call_layer(combiner, inputs, **kwargs)
+        elif isinstance(combiner, str):
+            result = process_str_sequence_combiner(inputs, combiner, **kwargs)
+
+    return result
+
+
 def process_str_sequence_combiner(
     inputs: Union[tf.Tensor, tf.RaggedTensor], combiner: str, **kwargs
 ) -> tf.Tensor:
-    """Process inputs with str sequence combiners ("mean" or "sum")
+    """Process inputs with str sequence combiners ("mean", "sum" or "max")
 
     Parameters
     ----------
     inputs : Union[tf.Tensor, tf.RaggedTensor]
         Input 3D tensor (batch size, seq length, embedding dim)
     combiner : str
-        The combiner: "mean" or "sum"
+        The combiner: "mean", "sum" or "max"
 
     Returns
     -------
@@ -1238,9 +1335,11 @@ def process_str_sequence_combiner(
         combiner = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))
     elif combiner == "sum":
         combiner = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x, axis=1))
+    elif combiner == "max":
+        combiner = tf.keras.layers.Lambda(lambda x: tf.reduce_max(x, axis=1))
     else:
         raise ValueError(
-            "Only 'mean' and 'sum' str combiners is implemented for dense"
+            "Only 'mean', 'sum', and 'max' str combiners is implemented for dense"
             " list/multi-hot embedded features. You can also"
             " provide a tf.keras.layers.Layer instance as a sequence combiner."
         )