-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_prep.py
More file actions
98 lines (81 loc) · 3.46 KB
/
data_prep.py
File metadata and controls
98 lines (81 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import cv2
import glob
import scipy
import numpy as np
import pandas as pd
from scipy import io
from tqdm import tqdm
np.random.seed(42)
raw_data_path = '../raw_data'
data_path = '../data'
dates = ['date1', 'date2', 'date3']
objectives = ['subject1', 'subject2']
def vid2frames(vid, namemeta, outdir):
if not os.path.exists(outdir):
os.makedirs(outdir)
mat = scipy.io.loadmat(vid)
for k, v in mat.items():
if k.__contains__('Video_fps'):
for i in range(v.shape[2]):
pngname = '{0}/{1}_{2}_{3}.png'.format(outdir, namemeta, k, i)
# print(pngname)
if not os.path.exists(pngname):
cv2.imwrite(pngname, v[:, :, i])
# # vid2frames
# def run_vid2frames(raw_data_path, data_path, dates):
# for subdir, dirs, files in os.walk(raw_data_path):
# for file in tqdm(files):
# filepath = subdir + os.sep + file
# filepath_list = filepath.split(sep=os.sep)
# label = '{0}_{1}'.format(filepath_list[2], filepath_list[3])
# if filepath_list[1] == dates[1]:
# if filepath.endswith(".mat"):
# # print (filepath, label, '{0}/train'.format(data_path))
# # break
# vid2frames(filepath, label, '{0}/train'.format(data_path))
# elif filepath_list[1] == dates[0]:
# vid2frames(filepath, label, '{0}/val'.format(data_path))
# split videos to train / test
def prep_train_test(dates, raw_data_path):
low_vids = list()
normal_vids = list()
for d in dates:
for obj in objectives:
for video_path in glob.glob('{0}/{1}/{2}/*/*.mat'.format(raw_data_path, d, obj)):
if '96' in video_path:
low_vids.append(video_path)
else:
normal_vids.append(video_path)
train_vids = low_vids[2:] + low_vids[:-2] + normal_vids[2:] + normal_vids[:-2]
val_vids = low_vids[:2] + low_vids[-2:] + normal_vids[:2] + normal_vids[-2:]
return train_vids, val_vids
def prep_data(train_vids, val_vids):
print('prep train data...')
for filepath in tqdm(train_vids):
filepath_list = filepath.split(sep=os.sep)
label = filepath_list[1]
if filepath.endswith(".mat"):
# print (filepath, label, '{0}/train'.format(data_path))
# break
vid2frames(filepath, label, '{0}/train'.format(data_path))
print('prep validation data...')
for filepath in tqdm(val_vids):
filepath_list = filepath.split(sep=os.sep)
label = filepath_list[1]
if filepath.endswith(".mat"):
# print (filepath, label, '{0}/train'.format(data_path))
# break
vid2frames(filepath, label, '{0}/val'.format(data_path))
def explore_data(data_path):
l = list()
for subdir, dirs, files in tqdm(os.walk(data_path)):
for file in files:
file_list = file.split(sep='_')
l.append([file, file_list[0], '{0}_{1}'.format(file_list[1], file_list[2]),
'{0}_{1}_{2}'.format(file_list[3], file_list[4], file_list[5])])
# %%
df = pd.DataFrame(l, columns=['ImageName', 'Label', 'OxygenSaturation', 'VideoName'])
# df.head()
# df.groupby(['Label', 'OxygenSaturation']).count()
return df