Skip to content

Commit f5bf0ed

Browse files
committed
test: Add mock data
1 parent 24b8c72 commit f5bf0ed

File tree

1 file changed

+374
-0
lines changed

1 file changed

+374
-0
lines changed

mock/create-mock.py

Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
import os
2+
import psycopg2
3+
from psycopg2 import sql
4+
import csv
5+
import random
6+
from datetime import datetime, timedelta
7+
8+
# 요일을 나타내는 숫자 리스트 (0: 월, 1: 화, ..., 6: 일)
9+
WEEK_DAYS = ["0", "1", "2", "3", "4", "5", "6"]
10+
11+
12+
# 랜덤한 요일 문자열을 생성하는 함수
13+
def generate_random_days():
14+
num_days = random.randint(1, 7) # 1~7개의 요일을 선택
15+
random_days = random.sample(WEEK_DAYS, num_days)
16+
random_days.sort() # 요일이 순서대로 정렬되도록
17+
return "".join(random_days) # 예: '012' -> 월, 화, 수
18+
19+
20+
# 데이터베이스 연결 설정
21+
def connect_to_db():
22+
conn = psycopg2.connect(
23+
dbname="taskie_test_db",
24+
user="testuser",
25+
password="testpass",
26+
host="127.0.0.1",
27+
port="9000",
28+
)
29+
return conn
30+
31+
32+
# CSV 파일로 대량의 랜덤 데이터 생성
33+
def generate_csv_for_user(file_name, row_count):
34+
with open(file_name, mode="w", newline="") as file:
35+
writer = csv.writer(file)
36+
writer.writerow(
37+
[
38+
"username",
39+
"password",
40+
"email",
41+
"profile_image",
42+
"nickname",
43+
] # id 및 created_at 제외
44+
)
45+
46+
for i in range(1, row_count + 1):
47+
username = f"user_{i}"
48+
password = f"pass_{i}"
49+
email = f"user_{i}@example.com"
50+
profile_image = f"profile_{i}.png"
51+
nickname = f"nick_{i}"
52+
writer.writerow(
53+
[username, password, email, profile_image, nickname]
54+
)
55+
56+
print(f"Generated {row_count} rows of data into {file_name}.")
57+
58+
59+
def generate_csv_for_habit(file_name, row_count, user_count):
60+
with open(file_name, mode="w", newline="") as file:
61+
writer = csv.writer(file)
62+
writer.writerow(
63+
[
64+
"title",
65+
"end_time_minutes",
66+
"start_time_minutes",
67+
"repeat_days",
68+
"repeat_time_minutes",
69+
"activated",
70+
"user_id",
71+
"created_at",
72+
"updated_at",
73+
] # id 및 created_at 제외
74+
)
75+
76+
for i in range(1, row_count + 1):
77+
title = f"Habit {i}"
78+
end_time_minutes = random.randint(0, 1440)
79+
start_time_minutes = random.randint(0, 1440)
80+
repeat_days = generate_random_days() # 랜덤한 요일 생성
81+
repeat_time_minutes = random.randint(0, 1440)
82+
activated = random.choice([True, False])
83+
user_id = random.randint(1, user_count)
84+
created_at = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
85+
updated_at = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
86+
writer.writerow(
87+
[
88+
title,
89+
end_time_minutes,
90+
start_time_minutes,
91+
repeat_days,
92+
repeat_time_minutes,
93+
activated,
94+
user_id,
95+
created_at,
96+
updated_at,
97+
]
98+
)
99+
100+
print(f"Generated {row_count} rows of data into {file_name}.")
101+
102+
103+
def generate_csv_for_habit_log(file_name, row_count, habit_count):
104+
with open(file_name, mode="w", newline="") as file:
105+
writer = csv.writer(file)
106+
writer.writerow(["completed_at", "habit_id"]) # id 제외
107+
108+
for i in range(1, row_count + 1):
109+
completed_at = (
110+
datetime.now() - timedelta(days=random.randint(0, 365))
111+
).strftime("%Y-%m-%d %H:%M:%S")
112+
habit_id = random.randint(1, habit_count)
113+
writer.writerow([completed_at, habit_id])
114+
115+
print(f"Generated {row_count} rows of data into {file_name}.")
116+
117+
118+
def generate_csv_for_routine(file_name, row_count, user_count):
119+
with open(file_name, mode="w", newline="") as file:
120+
writer = csv.writer(file)
121+
writer.writerow(
122+
[
123+
"title",
124+
"start_time_minutes",
125+
"repeat_days",
126+
"user_id",
127+
"created_at",
128+
"updated_at",
129+
] # id 및 created_at 제외
130+
)
131+
132+
for i in range(1, row_count + 1):
133+
title = f"Routine {i}"
134+
start_time_minutes = random.randint(0, 1440)
135+
repeat_days = generate_random_days() # 랜덤한 요일 생성
136+
user_id = random.randint(1, user_count)
137+
created_at = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
138+
updated_at = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
139+
writer.writerow(
140+
[
141+
title,
142+
start_time_minutes,
143+
repeat_days,
144+
user_id,
145+
created_at,
146+
updated_at,
147+
]
148+
)
149+
150+
print(f"Generated {row_count} rows of data into {file_name}.")
151+
152+
153+
def generate_csv_for_routine_element(
154+
file_name, row_count, routine_count, user_count
155+
):
156+
with open(file_name, mode="w", newline="") as file:
157+
writer = csv.writer(file)
158+
writer.writerow(
159+
[
160+
"title",
161+
"order",
162+
"duration_minutes",
163+
"routine_id",
164+
"created_at",
165+
"updated_at",
166+
"user_id",
167+
] # id 및 created_at 제외
168+
)
169+
170+
for i in range(1, row_count + 1):
171+
title = f"Routine Element {i}"
172+
order = random.randint(1, 10)
173+
duration_minutes = random.randint(1, 120)
174+
routine_id = random.randint(1, routine_count)
175+
created_at = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
176+
updated_at = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
177+
user_id = random.randint(1, user_count)
178+
179+
writer.writerow(
180+
[
181+
title,
182+
order,
183+
duration_minutes,
184+
routine_id,
185+
created_at,
186+
updated_at,
187+
user_id,
188+
]
189+
)
190+
191+
print(f"Generated {row_count} rows of data into {file_name}.")
192+
193+
194+
def prepare_file(file_name):
195+
if os.path.exists(file_name):
196+
os.remove(file_name)
197+
print(f"Preparing file: {file_name}")
198+
199+
200+
def generate_csv_for_routine_log(
201+
file_name, row_count, routine_count, routine_element_count
202+
):
203+
prepare_file(file_name)
204+
205+
with open(file_name, mode="w", newline="") as file:
206+
writer = csv.writer(file)
207+
writer.writerow(
208+
[
209+
"duration_seconds",
210+
"completed_at",
211+
"is_skipped",
212+
"routine_id",
213+
"routine_element_id",
214+
] # id 제외
215+
)
216+
217+
for i in range(1, row_count + 1):
218+
duration_seconds = random.randint(30, 3600)
219+
completed_at = (
220+
datetime.now() - timedelta(days=random.randint(0, 365))
221+
).strftime("%Y-%m-%d %H:%M:%S")
222+
is_skipped = random.choice([True, False])
223+
routine_id = random.randint(1, routine_count)
224+
routine_element_id = random.randint(1, routine_element_count)
225+
writer.writerow(
226+
[
227+
duration_seconds,
228+
completed_at,
229+
is_skipped,
230+
routine_element_id,
231+
routine_id,
232+
]
233+
)
234+
235+
print(f"Generated {row_count} rows of data into {file_name}.")
236+
237+
238+
# PostgreSQL에 CSV 파일을 사용해 대량 데이터 삽입
239+
def copy_from_csv(conn, table_name, file_name, columns):
240+
cursor = conn.cursor()
241+
with open(file_name, "r") as file:
242+
cursor.copy_expert(
243+
sql.SQL(
244+
"""
245+
COPY {} ({}) FROM STDIN WITH CSV HEADER
246+
"""
247+
).format(
248+
sql.Identifier(table_name),
249+
sql.SQL(", ").join(map(sql.Identifier, columns)),
250+
),
251+
file,
252+
)
253+
conn.commit()
254+
cursor.close()
255+
256+
257+
# 데이터베이스 테이블에 대량 데이터 삽입
258+
def insert_large_data():
259+
conn = connect_to_db()
260+
261+
# 각 테이블에 대한 CSV 파일 생성 및 데이터 삽입
262+
user_count = 1000000
263+
habit_count = 1000000
264+
habit_log_count = 500000
265+
routine_count = 5000000
266+
routine_element_count = 200000
267+
routine_element_log_count = 100000
268+
269+
# 사용자 데이터 생성 및 삽입
270+
generate_csv_for_user("mock/user_data.csv", user_count)
271+
copy_from_csv(
272+
conn,
273+
"user",
274+
"mock/user_data.csv",
275+
["username", "password", "email", "profile_image", "nickname"],
276+
)
277+
278+
# 습관 데이터 생성 및 삽입
279+
generate_csv_for_habit("mock/habit_data.csv", habit_count, user_count)
280+
copy_from_csv(
281+
conn,
282+
"habit",
283+
"mock/habit_data.csv",
284+
[
285+
"title",
286+
"end_time_minutes",
287+
"start_time_minutes",
288+
"repeat_days",
289+
"repeat_time_minutes",
290+
"activated",
291+
"user_id",
292+
"created_at",
293+
"updated_at",
294+
],
295+
)
296+
297+
# 습관 로그 데이터 생성 및 삽입
298+
generate_csv_for_habit_log(
299+
"mock/habit_log_data.csv", habit_log_count, habit_count
300+
)
301+
copy_from_csv(
302+
conn,
303+
"habit_log",
304+
"mock/habit_log_data.csv",
305+
["completed_at", "habit_id"],
306+
)
307+
308+
# 루틴 데이터 생성 및 삽입
309+
generate_csv_for_routine(
310+
"mock/routine_data.csv", routine_count, user_count
311+
)
312+
copy_from_csv(
313+
conn,
314+
"routine",
315+
"mock/routine_data.csv",
316+
[
317+
"title",
318+
"start_time_minutes",
319+
"repeat_days",
320+
"user_id",
321+
"created_at",
322+
"updated_at",
323+
],
324+
)
325+
326+
# 루틴 요소 데이터 생성 및 삽입
327+
generate_csv_for_routine_element(
328+
"mock/routine_element_data.csv",
329+
routine_element_count,
330+
routine_count,
331+
user_count,
332+
)
333+
copy_from_csv(
334+
conn,
335+
"routine_element",
336+
"mock/routine_element_data.csv",
337+
[
338+
"title",
339+
"order",
340+
"duration_minutes",
341+
"routine_id",
342+
"created_at",
343+
"updated_at",
344+
"user_id",
345+
],
346+
)
347+
348+
# 루틴 로그 데이터 생성 및 삽입
349+
generate_csv_for_routine_log(
350+
"mock/routine_log_data.csv",
351+
routine_element_log_count,
352+
routine_count,
353+
routine_element_count,
354+
)
355+
copy_from_csv(
356+
conn,
357+
"routine_log",
358+
"mock/routine_log_data.csv",
359+
[
360+
"duration_seconds",
361+
"completed_at",
362+
"is_skipped",
363+
"routine_element_id",
364+
"routine_id",
365+
],
366+
)
367+
368+
# 연결 종료
369+
conn.close()
370+
print("Data inserted successfully.")
371+
372+
373+
if __name__ == "__main__":
374+
insert_large_data()

0 commit comments

Comments
 (0)