Skip to content

Commit 88ed58c

Browse files
committed
Data generation logic
This data all fits into memory in one run and takes less than a minute to load into snowflake so it will completely reset nightly. Overview of (max) granularity of each table - One time: -- Databases, Tables - Daily: -- DatabaseStorageUsageHistory, StorageUsage - Hourly: -- LoadHistory, LoginHistory - Minute: -- QueryHistory
1 parent cc4f9ab commit 88ed58c

File tree

10 files changed

+743
-1
lines changed

10 files changed

+743
-1
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import DataRow from "./DataRow";
2+
import { TYPES } from "./Types";
3+
import { randomFromArray } from "../helpers";
4+
5+
class InitializeError extends Error {}
6+
7+
/**
8+
* DATABASE_STORAGE_USAGE_HISTORY View
9+
*
10+
* https://docs.snowflake.net/manuals/sql-reference/account-usage/database_storage_usage_history.html
11+
*/
12+
export class DatabaseStorageUsageHistory extends DataRow {
13+
constructor(databaseID, databaseName, date) {
14+
super();
15+
if (!this.constructor._sizes) {
16+
throw new InitializeError(
17+
`Must call ${this.constructor.name}.initialize() first`
18+
);
19+
}
20+
this.USAGE_DATE = date.toISOString();
21+
this.DATABASE_ID = databaseID;
22+
this.DATABASE_NAME = databaseName;
23+
this._setAverageDatabaseBytes();
24+
this._setAverageFailsafeBytes();
25+
}
26+
27+
_setAverageDatabaseBytes() {
28+
let size = this.constructor._sizes[this.DATABASE_NAME];
29+
if (Math.random() < 0.1) {
30+
const changeDegree = Math.random() * (0.2 - 0.05) + 0.05;
31+
const change = Math.round(size * changeDegree);
32+
if (Math.random() < 0.5) {
33+
size -= change;
34+
} else {
35+
size += change;
36+
}
37+
}
38+
this.AVERAGE_DATABASE_BYTES = size;
39+
}
40+
41+
_setAverageFailsafeBytes() {
42+
this.AVERAGE_FAILSAFE_BYTES = 0;
43+
if (Math.random() < 0.1) {
44+
this.AVERAGE_FAILSAFE_BYTES = Math.round(
45+
Math.random() * this.AVERAGE_DATABASE_BYTES
46+
);
47+
}
48+
}
49+
50+
static initialize(dbs) {
51+
if (this._sizes) {
52+
throw new InitializeError(
53+
`Must call ${this.name}.initialize() only once`
54+
);
55+
}
56+
const seedSizes = [1e12, 1e13, 1e14, 1e15];
57+
const sizes = {};
58+
for (const db of dbs) {
59+
const seed = randomFromArray(seedSizes);
60+
sizes[db.DATABASE_NAME] = Math.round(Math.random() * seed);
61+
}
62+
this._sizes = sizes;
63+
}
64+
65+
static types() {
66+
return [
67+
{
68+
name: "USAGE_DATE",
69+
type: TYPES.timestamp
70+
},
71+
{
72+
name: "DATABASE_ID",
73+
type: TYPES.integer
74+
},
75+
{
76+
name: "DATABASE_NAME",
77+
type: TYPES.string
78+
},
79+
{
80+
name: "AVERAGE_DATABASE_BYTES",
81+
type: TYPES.integer
82+
},
83+
{
84+
name: "AVERAGE_FAILSAFE_BYTES",
85+
type: TYPES.integer
86+
}
87+
];
88+
}
89+
}

data_rows/Databases.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import DataRow from "./DataRow";
2+
import { TYPES } from "./Types";
3+
4+
/**
5+
* DATABASES View
6+
*
7+
* https://docs.snowflake.net/manuals/sql-reference/account-usage/databases.html
8+
*/
9+
export class Databases extends DataRow {
10+
constructor(name, id) {
11+
super();
12+
this.DATABASE_NAME = name;
13+
this.DATABASE_ID = id;
14+
}
15+
16+
static generate() {
17+
const names = [
18+
"squiggly_database",
19+
"jims_database",
20+
"jacksonbase",
21+
"prod",
22+
"staging_db",
23+
"dev1"
24+
];
25+
return names.map((name, idx) => new this(name, idx + 1));
26+
}
27+
28+
static types() {
29+
return [
30+
{
31+
name: "DATABASE_NAME",
32+
type: TYPES.string
33+
},
34+
{
35+
name: "DATABASE_ID",
36+
type: TYPES.integer
37+
}
38+
];
39+
}
40+
}

data_rows/LoadHistory.js

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import DataRow from "./DataRow";
2+
import { TYPES } from "./Types";
3+
4+
/**
5+
* LOAD_HISTORY View
6+
*
7+
* https://docs.snowflake.net/manuals/sql-reference/account-usage/load_history.html
8+
*/
9+
export class LoadHistory extends DataRow {
10+
constructor(tableID, date) {
11+
super();
12+
this.TABLE_ID = tableID;
13+
this.LAST_LOAD_TIME = date.toISOString();
14+
this.ROW_COUNT = Math.round(Math.random() * 100);
15+
this.ERROR_COUNT = Math.random() < 0.1 ? Math.round(Math.random() * 10) : 0;
16+
}
17+
18+
static oddsNew() {
19+
return 0.2;
20+
}
21+
22+
static types() {
23+
return [
24+
{
25+
name: "TABLE_ID",
26+
type: TYPES.integer
27+
},
28+
{
29+
name: "LAST_LOAD_TIME",
30+
type: TYPES.timestamp
31+
},
32+
{
33+
name: "ROW_COUNT",
34+
type: TYPES.integer
35+
},
36+
{
37+
name: "ERROR_COUNT",
38+
type: TYPES.integer
39+
}
40+
];
41+
}
42+
}

data_rows/LoginHistory.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import DataRow from "./DataRow";
2+
import { TYPES } from "./Types";
3+
import * as helpers from "../helpers";
4+
5+
/**
6+
* LOGIN_HISTORY View
7+
*
8+
* https://docs.snowflake.net/manuals/sql-reference/account-usage/login_history.html
9+
*/
10+
export class LoginHistory extends DataRow {
11+
constructor(date) {
12+
super();
13+
this.EVENT_ID = helpers.getID();
14+
this.EVENT_TIMESTAMP = date.toISOString();
15+
const user = helpers.randomFromArray(this.constructor.getUsers());
16+
this.USER_NAME = user.name;
17+
this.REPORTED_CLIENT_TYPE = user.driver;
18+
this.IS_SUCCESS = Math.random() < 0.9 ? "YES" : "NO";
19+
}
20+
21+
static getUsers() {
22+
return [
23+
{ name: "WEB_CLIENT", driver: "JAVASCRIPT_DRIVER", querySpeed: 1.2 },
24+
{ name: "BOB", driver: "OTHER", querySpeed: 2.0 },
25+
{ name: "BI_APP", driver: "JDBC_DRIVER", querySpeed: 0.8 },
26+
{ name: "JANE", driver: "SNOWFLAKE_UI", querySpeed: 1.0 }
27+
];
28+
}
29+
30+
static types() {
31+
return [
32+
{
33+
name: "EVENT_ID",
34+
type: TYPES.string
35+
},
36+
{
37+
name: "EVENT_TIMESTAMP",
38+
type: TYPES.timestamp
39+
},
40+
{
41+
name: "USER_NAME",
42+
type: TYPES.string
43+
},
44+
{
45+
name: "REPORTED_CLIENT_TYPE",
46+
type: TYPES.string
47+
},
48+
{
49+
name: "IS_SUCCESS",
50+
type: TYPES.string
51+
}
52+
];
53+
}
54+
}

data_rows/QueryHistory.js

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import uuid4 from "uuid/v4";
2+
3+
import DataRow from "./DataRow";
4+
import { LoginHistory } from "./LoginHistory";
5+
import { TYPES } from "./Types";
6+
import * as helpers from "../helpers";
7+
8+
/**
9+
* QUERY_HISTORY View
10+
*
11+
* https://docs.snowflake.net/manuals/sql-reference/account-usage/query_history.html
12+
*/
13+
export class QueryHistory extends DataRow {
14+
constructor(databaseName, date) {
15+
super();
16+
this.QUERY_ID = uuid4();
17+
this.setQueryText();
18+
this.DATABASE_NAME = databaseName;
19+
const queryType = helpers.randomFromArray(this._getQueryTypes());
20+
this.QUERY_TYPE = queryType.type;
21+
const user = helpers.randomFromArray(LoginHistory.getUsers());
22+
this.USER_NAME = user.name;
23+
[this.WAREHOUSE_NAME, this.WAREHOUSE_SIZE] = helpers.randomFromArray([
24+
["BIG_WH", "X-Large"],
25+
["SMALL_WH", "Small"]
26+
]);
27+
this.setExecutionStatus();
28+
this.START_TIME = date.toISOString();
29+
this.COMPILATION_TIME = Math.round(Math.random() * 1000);
30+
this.setExecutionTime(user.querySpeed, queryType.querySpeed);
31+
this.QUEUED_REPAIR_TIME = 0;
32+
this.QUEUED_OVERLOAD_TIME =
33+
Math.random() < 0.2 ? Math.round(Math.random() * 1000) : 0;
34+
this.setTransactionBlockedtime();
35+
}
36+
37+
setQueryText() {
38+
const queries = ["SHOW USERS", "SHOW WAREHOUSES", "SELECT foo FROM bar"];
39+
this.QUERY_TEXT = helpers.randomFromArray(queries);
40+
}
41+
42+
_getQueryTypes() {
43+
return [
44+
{ type: "WITH", querySpeed: 1.2 },
45+
{ type: "REPLACE", querySpeed: 0.5 },
46+
{ type: "SHOW", querySpeed: 0.2 },
47+
{ type: "CREATE", querySpeed: 0.1 },
48+
{ type: "COPY", querySpeed: 1.5 },
49+
{ type: "SELECT", querySpeed: 1.9 },
50+
{ type: "UNKNOWN", querySpeed: 2.2 }
51+
];
52+
}
53+
54+
setExecutionStatus() {
55+
const statuses = [
56+
{ weight: 0.9, name: "SUCCESS" },
57+
{ weight: 0.31, name: "RUNNING" },
58+
{ weight: 0.28, name: "QUEUED" },
59+
{ weight: 0.24, name: "BLOCKED" },
60+
{ weight: 0.19, name: "RESUMING_WAREHOUSE" },
61+
{ weight: 0.15, name: "FAILED_WITH_ERROR" },
62+
{ weight: 0.1, name: "FAILED_WITH_INCIDENT" }
63+
];
64+
this.EXECUTION_STATUS = helpers.randomFromArrayByWeight(statuses);
65+
}
66+
67+
setExecutionTime(userQuerySpeed, queryTypeSpeed) {
68+
const warehouseQuerySpeed = {
69+
BIG_WH: 0.5,
70+
SMALL_WH: 1.5
71+
}[this.WAREHOUSE_NAME];
72+
const factor = warehouseQuerySpeed * userQuerySpeed * queryTypeSpeed * 1000;
73+
this.EXECUTION_TIME = Math.round(Math.random() * factor);
74+
}
75+
76+
setTransactionBlockedtime() {
77+
this.TRANSACTION_BLOCKED_TIME = 0;
78+
if (Math.random() < 0.05) {
79+
this.TRANSACTION_BLOCKED_TIME = Math.round(Math.random() * 1000);
80+
}
81+
}
82+
83+
static oddsNew() {
84+
return 0.3;
85+
}
86+
87+
static types() {
88+
return [
89+
{
90+
name: "QUERY_ID",
91+
type: TYPES.string
92+
},
93+
{
94+
name: "QUERY_TEXT",
95+
type: TYPES.string
96+
},
97+
{
98+
name: "DATABASE_NAME",
99+
type: TYPES.string
100+
},
101+
{
102+
name: "QUERY_TYPE",
103+
type: TYPES.string
104+
},
105+
{
106+
name: "USER_NAME",
107+
type: TYPES.string
108+
},
109+
{
110+
name: "WAREHOUSE_NAME",
111+
type: TYPES.string
112+
},
113+
{
114+
name: "WAREHOUSE_SIZE",
115+
type: TYPES.string
116+
},
117+
{
118+
name: "EXECUTION_STATUS",
119+
type: TYPES.string
120+
},
121+
{
122+
name: "START_TIME",
123+
type: TYPES.timestamp
124+
},
125+
{
126+
name: "COMPILATION_TIME",
127+
type: TYPES.integer
128+
},
129+
{
130+
name: "EXECUTION_TIME",
131+
type: TYPES.integer
132+
},
133+
{
134+
name: "QUEUED_REPAIR_TIME",
135+
type: TYPES.integer
136+
},
137+
{
138+
name: "QUEUED_OVERLOAD_TIME",
139+
type: TYPES.integer
140+
},
141+
{
142+
name: "TRANSACTION_BLOCKED_TIME",
143+
type: TYPES.integer
144+
}
145+
];
146+
}
147+
}

0 commit comments

Comments
 (0)