Skip to content

Commit ac881dd

Browse files
authored
Merge pull request #1522 from zm711/biocam-testing
BiocamRawIO: Prevent loading whole array into memory
2 parents 1b52cdf + f74136a commit ac881dd

File tree

1 file changed

+56
-17
lines changed

1 file changed

+56
-17
lines changed

neo/rawio/biocamrawio.py

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
_spike_channel_dtype,
1818
_event_channel_dtype,
1919
)
20+
from neo.core import NeoReadWriteError
2021

2122

2223
class BiocamRawIO(BaseRawIO):
@@ -122,15 +123,45 @@ def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, strea
122123
i_start = 0
123124
if i_stop is None:
124125
i_stop = self._num_frames
125-
if channel_indexes is None:
126-
channel_indexes = slice(None)
126+
127+
# read functions are different based on the version of biocam
127128
data = self._read_function(self._filehandle, i_start, i_stop, self._num_channels)
128-
return data[:, channel_indexes]
129+
130+
# older style data returns array of (n_samples, n_channels), should be a view
131+
# but if memory issues come up we should doublecheck out how the file is being stored
132+
if data.ndim > 1:
133+
if channel_indexes is None:
134+
channel_indexes = slice(None)
135+
sig_chunk = data[:, channel_indexes]
136+
137+
# newer style data returns an initial flat array (n_samples * n_channels)
138+
# we iterate through channels rather than slicing
139+
else:
140+
if channel_indexes is None:
141+
channel_indexes = [ch for ch in range(self._num_channels)]
142+
143+
sig_chunk = np.zeros((i_stop-i_start, len(channel_indexes)))
144+
# iterate through channels to prevent loading all channels into memory which can cause
145+
# memory exhaustion. See https://github.com/SpikeInterface/spikeinterface/issues/3303
146+
for index, channel_index in enumerate(channel_indexes):
147+
sig_chunk[:, index] = data[channel_index::self._num_channels]
148+
149+
return sig_chunk
129150

130151

131-
def open_biocam_file_header(filename):
152+
def open_biocam_file_header(filename)-> dict:
132153
"""Open a Biocam hdf5 file, read and return the recording info, pick the correct method to access raw data,
133-
and return this to the caller."""
154+
and return this to the caller
155+
156+
Parameters
157+
----------
158+
filename: str
159+
The file to be parsed
160+
161+
Returns
162+
-------
163+
dict
164+
The information necessary to read a biocam file (gain, n_samples, n_channels, etc)."""
134165
import h5py
135166

136167
rf = h5py.File(filename, "r")
@@ -154,9 +185,9 @@ def open_biocam_file_header(filename):
154185
elif file_format in (101, 102) or file_format is None:
155186
num_channels = int(rf["3BData/Raw"].shape[0] / num_frames)
156187
else:
157-
raise Exception("Unknown data file format.")
188+
raise NeoReadWriteError("Unknown data file format.")
158189

159-
# # get channels
190+
# get channels
160191
channels = rf["3BRecInfo/3BMeaStreams/Raw/Chs"][:]
161192

162193
# determine correct function to read data
@@ -166,14 +197,14 @@ def open_biocam_file_header(filename):
166197
elif signal_inv == -1:
167198
read_function = readHDF5t_100_i
168199
else:
169-
raise Exception("Unknown signal inversion")
200+
raise NeoReadWriteError("Unknown signal inversion")
170201
else:
171202
if signal_inv == 1:
172203
read_function = readHDF5t_101
173204
elif signal_inv == -1:
174205
read_function = readHDF5t_101_i
175206
else:
176-
raise Exception("Unknown signal inversion")
207+
raise NeoReadWriteError("Unknown signal inversion")
177208

178209
gain = (max_uv - min_uv) / (2**bit_depth)
179210
offset = min_uv
@@ -200,19 +231,22 @@ def open_biocam_file_header(filename):
200231
scale_factor = experiment_settings["ValueConverter"]["ScaleFactor"]
201232
sampling_rate = experiment_settings["TimeConverter"]["FrameRate"]
202233

234+
num_channels = None
203235
for key in rf:
204236
if key[:5] == "Well_":
205237
num_channels = len(rf[key]["StoredChIdxs"])
206238
if len(rf[key]["Raw"]) % num_channels:
207-
raise RuntimeError(f"Length of raw data array is not multiple of channel number in {key}")
239+
raise NeoReadWriteError(f"Length of raw data array is not multiple of channel number in {key}")
208240
num_frames = len(rf[key]["Raw"]) // num_channels
209241
break
210-
try:
242+
243+
if num_channels is not None:
211244
num_channels_x = num_channels_y = int(np.sqrt(num_channels))
212-
except NameError:
213-
raise RuntimeError("No Well found in the file")
245+
else:
246+
raise NeoReadWriteError("No Well found in the file")
247+
214248
if num_channels_x * num_channels_y != num_channels:
215-
raise RuntimeError(f"Cannot determine structure of the MEA plate with {num_channels} channels")
249+
raise NeoReadWriteError(f"Cannot determine structure of the MEA plate with {num_channels} channels")
216250
channels = 1 + np.concatenate(np.transpose(np.meshgrid(range(num_channels_x), range(num_channels_y))))
217251

218252
gain = scale_factor * (max_uv - min_uv) / (max_digital - min_digital)
@@ -231,6 +265,10 @@ def open_biocam_file_header(filename):
231265
)
232266

233267

268+
######################################################################
269+
# Helper functions to obtain the raw data split by Biocam version.
270+
271+
# return the full array for the old datasets
234272
def readHDF5t_100(rf, t0, t1, nch):
235273
return rf["3BData/Raw"][t0:t1]
236274

@@ -239,15 +277,16 @@ def readHDF5t_100_i(rf, t0, t1, nch):
239277
return 4096 - rf["3BData/Raw"][t0:t1]
240278

241279

280+
# return flat array that we will iterate through
242281
def readHDF5t_101(rf, t0, t1, nch):
243-
return rf["3BData/Raw"][nch * t0 : nch * t1].reshape((t1 - t0, nch), order="C")
282+
return rf["3BData/Raw"][nch * t0 : nch * t1]
244283

245284

246285
def readHDF5t_101_i(rf, t0, t1, nch):
247-
return 4096 - rf["3BData/Raw"][nch * t0 : nch * t1].reshape((t1 - t0, nch), order="C")
286+
return 4096 - rf["3BData/Raw"][nch * t0 : nch * t1]
248287

249288

250289
def readHDF5t_brw4(rf, t0, t1, nch):
251290
for key in rf:
252291
if key[:5] == "Well_":
253-
return rf[key]["Raw"][nch * t0 : nch * t1].reshape((t1 - t0, nch), order="C")
292+
return rf[key]["Raw"][nch * t0 : nch * t1]

0 commit comments

Comments
 (0)