Skip to content

Commit 1e01e26

Browse files
committed
WIP
1 parent 13c1141 commit 1e01e26

File tree

8 files changed

+1059
-1945
lines changed

8 files changed

+1059
-1945
lines changed

examples/usecases/SIGIR_dataset.ipynb

Lines changed: 0 additions & 1945 deletions
This file was deleted.

examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb

Lines changed: 731 additions & 0 deletions
Large diffs are not rendered by default.

merlin/datasets/ecommerce/sigir/__init__.py

Whitespace-only changes.

merlin/datasets/ecommerce/sigir/browsing_train/__init__.py

Whitespace-only changes.
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
{
2+
"feature": [
3+
{
4+
"name": "session_id_hash",
5+
"type": "INT",
6+
"intDomain": {
7+
"name": "session_id_hash",
8+
"max": "999",
9+
"isCategorical": true
10+
},
11+
"annotation": {
12+
"tag": [
13+
"categorical",
14+
"id",
15+
"item"
16+
],
17+
"extraMetadata": [
18+
{
19+
"num_buckets": null,
20+
"freq_threshold": 0.0,
21+
"max_size": 1000.0,
22+
"cat_path": ".//categories/unique.session_id_hash.parquet",
23+
"embedding_sizes": {
24+
"cardinality": 1000.0,
25+
"dimension": 77.0
26+
},
27+
"_dims": [
28+
[
29+
0.0,
30+
null
31+
]
32+
],
33+
"is_list": false,
34+
"is_ragged": false,
35+
"dtype_item_size": 64.0
36+
}
37+
]
38+
}
39+
},
40+
{
41+
"name": "event_type",
42+
"type": "INT",
43+
"intDomain": {
44+
"name": "event_type",
45+
"max": "4",
46+
"isCategorical": true
47+
},
48+
"annotation": {
49+
"tag": [
50+
"categorical"
51+
],
52+
"extraMetadata": [
53+
{
54+
"num_buckets": null,
55+
"freq_threshold": 0.0,
56+
"max_size": 1000.0,
57+
"cat_path": ".//categories/unique.event_type.parquet",
58+
"embedding_sizes": {
59+
"cardinality": 5.0,
60+
"dimension": 16.0
61+
},
62+
"_dims": [
63+
[
64+
0.0,
65+
null
66+
]
67+
],
68+
"is_list": false,
69+
"is_ragged": false,
70+
"dtype_item_size": 64.0
71+
}
72+
]
73+
}
74+
},
75+
{
76+
"name": "product_action",
77+
"type": "INT",
78+
"intDomain": {
79+
"name": "product_action",
80+
"max": "6",
81+
"isCategorical": true
82+
},
83+
"annotation": {
84+
"tag": [
85+
"categorical"
86+
],
87+
"extraMetadata": [
88+
{
89+
"num_buckets": null,
90+
"freq_threshold": 0.0,
91+
"max_size": 1000.0,
92+
"cat_path": ".//categories/unique.product_action.parquet",
93+
"embedding_sizes": {
94+
"cardinality": 7.0,
95+
"dimension": 16.0
96+
},
97+
"_dims": [
98+
[
99+
0.0,
100+
null
101+
]
102+
],
103+
"is_list": false,
104+
"is_ragged": false,
105+
"dtype_item_size": 64.0
106+
}
107+
]
108+
}
109+
},
110+
{
111+
"name": "product_sku_hash",
112+
"type": "INT",
113+
"intDomain": {
114+
"name": "product_sku_hash",
115+
"max": "999",
116+
"isCategorical": true
117+
},
118+
"annotation": {
119+
"tag": [
120+
"categorical"
121+
],
122+
"extraMetadata": [
123+
{
124+
"num_buckets": null,
125+
"freq_threshold": 0.0,
126+
"max_size": 1000.0,
127+
"cat_path": ".//categories/unique.product_sku_hash.parquet",
128+
"embedding_sizes": {
129+
"cardinality": 1000.0,
130+
"dimension": 77.0
131+
},
132+
"_dims": [
133+
[
134+
0.0,
135+
null
136+
]
137+
],
138+
"is_list": false,
139+
"is_ragged": false,
140+
"dtype_item_size": 64.0
141+
}
142+
]
143+
}
144+
},
145+
{
146+
"name": "hashed_url",
147+
"type": "INT",
148+
"intDomain": {
149+
"name": "hashed_url",
150+
"max": "999",
151+
"isCategorical": true
152+
},
153+
"annotation": {
154+
"tag": [
155+
"categorical"
156+
],
157+
"extraMetadata": [
158+
{
159+
"num_buckets": null,
160+
"freq_threshold": 0.0,
161+
"max_size": 1000.0,
162+
"cat_path": ".//categories/unique.hashed_url.parquet",
163+
"embedding_sizes": {
164+
"cardinality": 1000.0,
165+
"dimension": 77.0
166+
},
167+
"_dims": [
168+
[
169+
0.0,
170+
null
171+
]
172+
],
173+
"is_list": false,
174+
"is_ragged": false,
175+
"dtype_item_size": 64.0
176+
}
177+
]
178+
}
179+
},
180+
{
181+
"name": "server_timestamp_epoch_ms",
182+
"type": "FLOAT",
183+
"annotation": {
184+
"tag": [
185+
"continuous"
186+
],
187+
"extraMetadata": [
188+
{
189+
"_dims": [
190+
[
191+
0.0,
192+
null
193+
]
194+
],
195+
"is_list": false,
196+
"is_ragged": false,
197+
"dtype_item_size": 64.0
198+
}
199+
]
200+
}
201+
}
202+
]
203+
}

merlin/datasets/ecommerce/sigir/sku_information/__init__.py

Whitespace-only changes.
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
{
2+
"feature": [
3+
{
4+
"name": "product_sku_hash",
5+
"type": "INT",
6+
"intDomain": {
7+
"name": "product_sku_hash",
8+
"max": "999",
9+
"isCategorical": true
10+
},
11+
"annotation": {
12+
"tag": [
13+
"id",
14+
"categorical",
15+
"item"
16+
],
17+
"extraMetadata": [
18+
{
19+
"num_buckets": null,
20+
"freq_threshold": 0.0,
21+
"max_size": 1000.0,
22+
"cat_path": ".//categories/unique.product_sku_hash.parquet",
23+
"embedding_sizes": {
24+
"cardinality": 1000.0,
25+
"dimension": 77.0
26+
},
27+
"_dims": [
28+
[
29+
0.0,
30+
null
31+
]
32+
],
33+
"is_list": false,
34+
"is_ragged": false,
35+
"dtype_item_size": 64.0
36+
}
37+
]
38+
}
39+
},
40+
{
41+
"name": "description_vector",
42+
"type": "FLOAT",
43+
"floatDomain": {
44+
"min": -0.44,
45+
"max": 0.603
46+
},
47+
"annotation": {
48+
"tag": [
49+
"item"
50+
],
51+
"extraMetadata": [
52+
{
53+
"_dims": [
54+
[
55+
0.0,
56+
null
57+
],
58+
[
59+
50,
60+
50
61+
]
62+
],
63+
"is_list": true,
64+
"is_ragged": true,
65+
"dtype_item_size": 64.0
66+
}
67+
]
68+
}
69+
},
70+
{
71+
"name": "image_vector",
72+
"type": "FLOAT",
73+
"floatDomain": {
74+
"min": -426.2960265063624,
75+
"max": 757.3242762232064
76+
},
77+
"annotation": {
78+
"tag": [
79+
"item"
80+
],
81+
"extraMetadata": [
82+
{
83+
"_dims": [
84+
[
85+
0.0,
86+
null
87+
],
88+
[
89+
66386,
90+
66386
91+
]
92+
],
93+
"is_list": true,
94+
"is_ragged": true,
95+
"dtype_item_size": 64.0
96+
}
97+
]
98+
}
99+
},
100+
{
101+
"name": "price_bucket",
102+
"type": "FLOAT",
103+
"annotation": {
104+
"tag": [
105+
"continuous"
106+
],
107+
"extraMetadata": [
108+
{
109+
"_dims": [
110+
[
111+
0.0,
112+
null
113+
]
114+
],
115+
"is_list": false,
116+
"is_ragged": false,
117+
"dtype_item_size": 64.0
118+
}
119+
]
120+
}
121+
}
122+
]
123+
}

merlin/datasets/synthetic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
"booking.com": HERE / "ecommerce/booking/transformed/",
5050
"booking.com-raw": HERE / "ecommerce/booking/raw/",
5151
"transactions": HERE / "ecommerce/transactions",
52+
"sigir-browsing": HERE / "ecommerce/sigir/browsing_train",
53+
"sigir-sku": HERE / "ecommerce/sigir/sku_information",
5254
}
5355

5456

0 commit comments

Comments
 (0)