Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16,781 changes: 16,781 additions & 0 deletions examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb

Large diffs are not rendered by default.

Empty file.
Empty file.
209 changes: 209 additions & 0 deletions merlin/datasets/ecommerce/sigir/browsing_train/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
{
"feature": [
{
"name": "session_id_hash",
"type": "INT",
"intDomain": {
"name": "session_id_hash",
"max": "999",
"isCategorical": true
},
"annotation": {
"tag": [
"item_id",
"item",
"categorical",
"id"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"start_index": 0.0,
"cat_path": ".//categories/unique.session_id_hash.parquet",
"embedding_sizes": {
"cardinality": 1000.0,
"dimension": 77.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "event_type",
"type": "INT",
"intDomain": {
"name": "event_type",
"max": "2",
"isCategorical": true
},
"annotation": {
"tag": [
"categorical"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"start_index": 0.0,
"cat_path": ".//categories/unique.event_type.parquet",
"embedding_sizes": {
"cardinality": 3.0,
"dimension": 16.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "product_action",
"type": "INT",
"intDomain": {
"name": "product_action",
"max": "4",
"isCategorical": true
},
"annotation": {
"tag": [
"categorical"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"start_index": 0.0,
"cat_path": ".//categories/unique.product_action.parquet",
"embedding_sizes": {
"cardinality": 5.0,
"dimension": 16.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "product_sku_hash",
"type": "INT",
"intDomain": {
"name": "product_sku_hash",
"max": "999",
"isCategorical": true
},
"annotation": {
"tag": [
"categorical"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"start_index": 0.0,
"cat_path": ".//categories/unique.product_sku_hash.parquet",
"embedding_sizes": {
"cardinality": 1000.0,
"dimension": 77.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "hashed_url",
"type": "INT",
"intDomain": {
"name": "hashed_url",
"max": "999",
"isCategorical": true
},
"annotation": {
"tag": [
"categorical"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"start_index": 0.0,
"cat_path": ".//categories/unique.hashed_url.parquet",
"embedding_sizes": {
"cardinality": 1000.0,
"dimension": 77.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "server_timestamp_epoch_ms",
"type": "FLOAT",
"annotation": {
"tag": [
"continuous"
],
"extraMetadata": [
{
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
}
]
}
Empty file.
132 changes: 132 additions & 0 deletions merlin/datasets/ecommerce/sigir/sku_information/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
{
"feature": [
{
"name": "product_sku_hash",
"type": "INT",
"intDomain": {
"name": "product_sku_hash",
"max": "999",
"isCategorical": true
},
"annotation": {
"tag": [
"id",
"categorical",
"item"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"cat_path": ".//categories/unique.product_sku_hash.parquet",
"embedding_sizes": {
"cardinality": 1000.0,
"dimension": 77.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "description_vector",
"type": "FLOAT",
"floatDomain": {
"min": -0.44,
"max": 0.603
},
"annotation": {
"tag": [
"item"
],
"extraMetadata": [
{
"_dims": [
[
0.0,
null
],
[
50,
50
]
],
"is_list": true,
"is_ragged": true,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "category_hash",
"type": "INT",
"intDomain": {
"name": "category_hash",
"max": "174",
"isCategorical": true
},
"annotation": {
"tag": [
"item",
"item_id",
"categorical",
"id"
],
"extraMetadata": [
{
"num_buckets": null,
"freq_threshold": 0.0,
"max_size": 1000.0,
"start_index": 0.0,
"cat_path": ".//categories/unique.category_hash.parquet",
"embedding_sizes": {
"cardinality": 175.0,
"dimension": 29.0
},
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
},
{
"name": "price_bucket",
"type": "FLOAT",
"annotation": {
"tag": [
"continuous"
],
"extraMetadata": [
{
"_dims": [
[
0.0,
null
]
],
"is_list": false,
"is_ragged": false,
"dtype_item_size": 64.0
}
]
}
}
]
}
2 changes: 2 additions & 0 deletions merlin/datasets/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
"booking.com": HERE / "ecommerce/booking/transformed/",
"booking.com-raw": HERE / "ecommerce/booking/raw/",
"transactions": HERE / "ecommerce/transactions",
"sigir-browsing": HERE / "ecommerce/sigir/browsing_train",
"sigir-sku": HERE / "ecommerce/sigir/sku_information",
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import shutil

import pytest
from testbook import testbook

from tests.conftest import REPO_ROOT

pytest.importorskip("transformers")
utils = pytest.importorskip("merlin.systems.triton.utils")

TRITON_SERVER_PATH = shutil.which("tritonserver")


@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
@testbook(
REPO_ROOT
/ "examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb",
timeout=720,
execute=False,
)
@pytest.mark.notebook
def test_next_item_prediction(tb, tmpdir):
tb.inject(
f"""
import os, random
os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}"
os.environ["NUM_EPOCHS"] = "1"
os.environ["NUM_EXAMPLES"] = "1_500"
os.environ["MINIMUM_SESSION_LENGTH"] = "2"
"""
)
tb.execute_cell(list(range(0, 48)))

with utils.run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001):
tb.execute_cell(list(range(48, len(tb.cells))))

predicted_hashed_url_id = tb.ref("predicted_hashed_url_id").item()
assert predicted_hashed_url_id >= 0 and predicted_hashed_url_id <= 1002