visual-neural-decode/linear_decoder_parallelize.py at master · nikparth/visual-neural-decode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import os
import numpy as np
import h5py
from time import time

from dataset import ds_x_activities_y_original

## Import from constants file
data_dir = 'FakeData/'
cell_type_inds_path = 'temp/' # optional, only if you ever want to split cell types
linear_decoder_save_path = 'FakeModels/'
immean = 0.5
total_block_size = 10000

class LinearDecoderParallelize():

	def __init__(self):
		self.data = []

	def ComputeStatistics(activity_suffix, block_vec, im_width=128, cell_type_list=None, data_type='training'):
		'''
		Inputs:
			activitiy_suffix: string that follows activities you want, for example 'spatialSim' for activitiies in folder 'activities_spatialSim'
			block_vec: numpy array of block indices for which you want to compute statistics
			im_width: the size of one side of the images (code only handles square images for now but this could be easily modified)
			cell_type_list: numpy array of index of specific cell types you want decoded (corresponding to cell_type_inds.npy), for example np.asarray(0,1) if you want to grab all cell types identified as 0 or 1
			data_type: the type of images/activities to compute the linear decoder using, should probably always be training
		Outputs:
			Computes and saves the XTX/XTY statistics for every block in block_vec-
		'''

		# -------------------------------------
		# CREATE MODEL SAVE DIRECTORY/LOG FILE
		# -------------------------------------

		if cell_type_list is not None: # if using specific cell types
			cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
			directory_name = linear_decoder_save_path+'/linear_decoder_'+activity_suffix+'_'+cell_type_string+''
		else:
			directory_name = linear_decoder_save_path+'/linear_decoder_'+activity_suffix+''

		if not os.path.exists(directory_name):
			os.makedirs(directory_name)
			os.makedirs(directory_name+'/model')
			os.makedirs(directory_name+'/logs')

		# --------------------
		# LOAD DATA GENERATOR
		# --------------------
		activities_dir_path = data_dir+data_type+'/activities_'+activity_suffix+'/'
		original_dir_path = data_dir+data_type+'/images/'
		data_generator = ds_x_activities_y_original(batch_size=total_block_size, im_width=128, activities_dir_path=activities_dir_path, original_dir_path=original_dir_path, image_mean=immean, normalize_scale=True, bucket_range=block_vec.tolist(), shuffle=False)

		# -----------------
		# PARSE CELL TYPES
		# -----------------

		if cell_type_list is not None: # use specific cell types instead of all the data
			cell_type_inds = np.load(cell_type_inds_path+'/cell_type_inds.npy')
			which_n = np.in1d(cell_type_inds, cell_type_list) # specify a specific cell type

		# ---------------------------------------
		# CALCULATE SAMPLE XTX AND XTY STATISTICS
		# ---------------------------------------

		start_time = time()

		for i_block in block_vec:

			log_file_name = directory_name + '/logs/XTX_XTY_Block_'+str(i_block)+'.txt'

			with open(log_file_name, "a") as log_file:
				log_file.write('Block number ' + str(i_block) + '\n')

			# Generate activities (X)/ images(Y)
			X, Y, block_num = next(data_generator)
			Y = Y.reshape((-1,im_width*im_width))

			# Sanity check that generator is creating correct block
			if i_block != block_num:
				print('ERROR: INACCURATE DATA GENERATION')
				sys.exit()

			# Prepare activities
			if cell_type_list is not None:
				X = X[:,which_n]
			X = np.insert(X, 0, 1.0, axis=1) # insert column of bias terms

			# Compute XTX/XTY

			XTX = X.T.dot(X)
			XTY = X.T.dot(Y)

			# Create data saving names
			if not cell_type_list: # if using all cells
				model_path_XTX = directory_name+'/model/XTX_B_'+str(i_block)+'_W'+activity_suffix+'.h5'
				model_path_XTY = directory_name+'/model/XTY_B_'+str(i_block)+'_W'+activity_suffix+'.h5'
			else: # if using specific cell type
				cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
				model_path_XTX = directory_name+'/model/XTX_B_'+str(i_block)+'_W'+activity_suffix+'_CellType_'+cell_type_string+'.h5'
				model_path_XTY = directory_name+'/model/XTY_B_'+str(i_block)+'_W'+activity_suffix+'_CellType_'+cell_type_string+'.h5'

			# Save data
			h5_temp = h5py.File(model_path_XTX, 'w')
			h5_temp.create_dataset('data',
					data=XTX)
			h5_temp.close()

			h5_temp = h5py.File(model_path_XTY, 'w')
			h5_temp.create_dataset('data',
					data=XTY)
			h5_temp.close()

			end_time = time()

			# Log memory/time

			with open(log_file_name, "a") as log_file:
				log_file.write('Block time ' + str((end_time-start_time)/60.) + ' \n')

			start_time = time()

		return

	def CreateFullW(activity_suffix, block_vec, im_width=128, cell_type_list=None, data_type='training'):
		'''
		Inputs:
			activitiy_suffix: string that follows activities you want, for example 'spatialSim' for activitiies in folder 'activities_spatialSim'
			block_vec: numpy array of block indices for which you want to add statistics, should usually be all of training data blocks
			im_width: the size of one side of the images (code only handles square images for now but this could be easily modified)
			cell_type_list: numpy array of index of specific cell types you want decoded (corresponding to cell_type_inds.npy), for example np.asarray(0,1) if you want to grab all cell types identified as 0 or 1
			data_type: the type of images/activities to compute the linear decoder using, should probably always be training
		Outputs:
			Computes and saves linear decoder weights
		'''
		start = time()

		if cell_type_list is not None: # if using specific cell types
			cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
			directory_name = linear_decoder_save_path+'/linear_decoder_'+activity_suffix+'_'+cell_type_string+''
		else:
			directory_name = linear_decoder_save_path+'/linear_decoder_'+activity_suffix+''

		log_file_name = directory_name + '/logs/CreateFullW.txt'

		# Log block indices
		with open(log_file_name, "a") as log_file:
			log_file.write('blocks: {} \n'.format(block_vec))

		# ------------------------------------
		# LOOP OVER BLOCKS AND ADD STATISTICS
		# ------------------------------------
		stats_created=0
		for i_block in block_vec:

			# Load block specific XTX/XTY
			if not cell_type_list: # if using all cells
				model_path_XTX = directory_name+'/model//XTX_B_'+str(i_block)+'_W'+activity_suffix+'.h5'
				model_path_XTY = directory_name+'/model/XTY_B_'+str(i_block)+'_W'+activity_suffix+'.h5'
			else: # if using specific cell type
				cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
				model_path_XTX = directory_name+'/model/XTX_B_'+str(i_block)+'_W'+activity_suffix+'_CellType_'+cell_type_string+'.h5'
				model_path_XTY = directory_name+'/model/XTY_B_'+str(i_block)+'_W'+activity_suffix+'_CellType_'+cell_type_st+'.h5'

			XTX_block = h5py.File(model_path_XTX, 'r')
			thisXTX = XTX_block['data'][:].astype('float32')
			XTX_block.close()

			XTY_block = h5py.File(model_path_XTY, 'r')
			thisXTY = XTY_block['data'][:].astype('float32')
			XTY_block.close()

			# Add this blocks statistics to overall ones
			if stats_created == 0:
				XTX =  thisXTX
				XTY = thisXTY
				stats_created = 1
			else:
				XTX += thisXTX
				XTY += thisXTY

			with open(log_file_name, "a") as log_file:
				log_file.write('Block ' + str(i_block) + ' \n')

		with open(log_file_name, "a") as log_file:
			log_file.write('Computing and saving model... \n')

		# -------------------------------
		# COMPUTE LINEAR DECODER WEIGHTS
		# -------------------------------

		XTX_inv = np.linalg.inv(XTX)
		W = np.dot(XTX_inv, XTY)

		# ----------------------------
		# SAVE LINEAR DECODER WEIGHTS
		# ----------------------------

		if not cell_type_list: # if using all cells
			model_path = directory_name+'/model/lineardecoder_W_'+activity_suffix+'.h5'
		else:
			cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
			model_path = directory_name+'/model/lineardecoder_W_'+activity_suffix+'_CellType_'+cell_type_string+'.h5'

		h5_temp = h5py.File(model_path, 'w')
		h5_temp.create_dataset('data',
				data=W)
		h5_temp.close()

		total_time = (time() - start)/60.
		with open(log_file_name, "a") as log_file:
			log_file.write('total time: {} \n'.format(total_time))

		return


	def CreateDecodedImages(activity_suffix, LD_directory_suffix, block_vec, im_width=128, cell_type_list=None, data_type='training'):
		'''
		Inputs:
			activitiy_suffix: string that follows activities you want, for example 'spatialSim' for activitiies in folder 'activities_spatialSim'
			LD_directory_suffix: the suffix for the folder in which to save linear decoded images, aka 'LD_images'
			block_vec: numpy array of block indices for which you want to add statistics, should usually be all of training data blocks
			im_width: the size of one side of the images (code only handles square images for now but this could be easily modified)
			cell_type_list: numpy array of index of specific cell types you want decoded (corresponding to cell_type_inds.npy), for example np.asarray(0,1) if you want to grab all cell types identified as 0 or 1
			data_type: the type of data for which to compute linear decoded images
		Outputs:
			Computes and saves linear decoder images for blocks in block_vec
		'''

		if cell_type_list is not None: # if using specific cell types
			cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
			directory_name = linear_decoder_save_path+'/linear_decoder_'+activity_suffix+'_'+cell_type_string+''
		else:
			directory_name = linear_decoder_save_path+'/linear_decoder_'+activity_suffix+''

		log_file_name = directory_name + '/logs/CreateDecodedImages_'+data_type+'.txt'

		# --------------------------------
		# MAKE DIRECTORY IF DOESN'T EXIST
		# --------------------------------

		if cell_type_list is not None: # if using specific cell types
			cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
			decoded_images_directory_name = ''+data_dir+data_type+'/decodedimages_cellType_'+cell_type_list+'_'+LD_directory_suffix+'/'
		else:
			decoded_images_directory_name = ''+data_dir+data_type+'/decodedimages_'+LD_directory_suffix+'/'

		if not os.path.exists(decoded_images_directory_name):
			os.makedirs(decoded_images_directory_name)

		# ----------------------------
		# LOAD LINEAR DECODER WEIGHTS
		# ----------------------------

		if not cell_type_list: # if using all cells
			model_path = directory_name+'/model/lineardecoder_W_'+activity_suffix+'.h5'
		else:
			cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
			model_path = directory_name+'/model/lineardecoder_W_'+activity_suffix+'_CellType_'+cell_type_string+'.h5'

		h5_temp = h5py.File(model_path, 'r')
		W = h5_temp['data'][:].astype('float32')
		h5_temp.close()

		# --------------------
		# LOAD DATA GENERATOR
		# --------------------

		activities_dir_path = data_dir+data_type+'/activities_'+activity_suffix+'/'
		original_dir_path = data_dir+data_type+'/images/'
		data_generator = ds_x_activities_y_original(batch_size=total_block_size, im_width=128, activities_dir_path=activities_dir_path, original_dir_path=original_dir_path, image_mean=immean, normalize_scale=True, bucket_range=block_vec.tolist(), shuffle=False)

		# --------------------------------------------
		# LOOP THROUGH BLOCKS AND SAVE DECODED IMAGES
		# --------------------------------------------

		for i_block in block_vec:

			# Generate activities

			X, Y, block_num = next(data_generator)

			# Sanity check that generator is creating correct block

			if i_block != block_num:
				print('ERROR: INACCURATE DATA GENERATION')
				sys.exit()

			# Prepare activities

			if cell_type_list is not None:
				X = X[:,which_n]
			X = np.insert(X, 0, 1.0, axis=1) # insert column of bias terms

			# Compute decoded images

			decoded_images = np.dot(X,W)

			# Save decoded images

			if not cell_type_list: # if using all cells
				save_path =decoded_images_directory_name+str(i_block)+'_'+LD_directory_suffix+'.h5'
			else:
				cell_type_string = str(cell_type_list).replace(' ','').replace('.','').replace('[','').replace(']','') # make cell type list concatenated string
				save_path =decoded_images_directory_name+str(i_block)+'_'+LD_directory_suffix+'_CellType_'+cell_type_list+'.h5'

			h5_temp = h5py.File(save_path, 'w')
			h5_temp.create_dataset('data',data=decoded_images)
			h5_temp.close()

			with open(log_file_name, "a") as log_file:
				log_file.write('Block ' + str(i_block) + ' '+data_type+' decoded images saved \n')