From 1b25878ea0d0a9aaa5af7112fc591a9372c53a76 Mon Sep 17 00:00:00 2001 From: sprenger Date: Wed, 26 Jan 2022 15:10:51 +0100 Subject: [PATCH 01/13] add first version of BEP Templater and accompanying test files and utilities --- BEP032Templater.py | 376 +++++++++++++++++++++++++++++++ diglab2ando.py => diglab2bids.py | 61 ++--- tests/test_BEP032Templater.py | 175 ++++++++++++++ tests/test_files/record.csv | 3 + tests/utils.py | 31 +++ 5 files changed, 598 insertions(+), 48 deletions(-) create mode 100644 BEP032Templater.py rename diglab2ando.py => diglab2bids.py (81%) create mode 100644 tests/test_BEP032Templater.py create mode 100644 tests/test_files/record.csv create mode 100644 tests/utils.py diff --git a/BEP032Templater.py b/BEP032Templater.py new file mode 100644 index 0000000..03d101c --- /dev/null +++ b/BEP032Templater.py @@ -0,0 +1,376 @@ +import shutil +import argparse +import re + +import bep032tools.validator.BEP032Validator + +try: + import pandas as pd + + HAVE_PANDAS = True +except ImportError: + HAVE_PANDAS = False +from bep032tools.validator.BEP032Validator import build_rule_regexp +from bep032tools.rulesStructured import RULES_SET +from bep032tools.generator.utils import * +from bep032tools.generator.BEP032Generator import BEP032Data + +METADATA_LEVELS = {i: r['authorized_metadata_files'] for i, r in enumerate(RULES_SET)} +METADATA_LEVEL_BY_NAME = {build_rule_regexp(v)[0]: k for k, values in METADATA_LEVELS.items() for v + in values} + +# TODO: These can be extracted from the BEP032Data init definition. Check out the +# function inspection options +ESSENTIAL_CSV_COLUMNS = ['sub_id', 'ses_id'] +OPTIONAL_CSV_COLUMNS = ['tasks', 'runs'] + + +class BEP032TemplateData(BEP032Data): + """ + Representation of a BEP032 Data, as specified by in the + [ephys BEP](https://bids.neuroimaging.io/bep032) + + The BEP032Data object can track multiple realizations of `split`, `run`, `task` but only a + single realization of `session` and `subject`, i.e. to represent multiple `session` folders, + multiple BEP032Data objects are required. To include multiple realizations of tasks + or runs, call the `register_data` method for each set of parameters separately. + + Parameters + ---------- + sub_id : str + subject identifier, e.g. '0012' or 'j.s.smith' + ses-id : str + session identifier, e.g. '20210101' or '007' + tasks : str + task identifier of data files + runs : str + run identifier of data files + """ + + def __init__(self, sub_id, ses_id, diglab_df=None, project_name=None): + super().__init__(sub_id, ses_id, modality='ephys') + self.diglab_df = diglab_df + self.project_name = project_name + + def generate_metadata_file_participants(self, output): + assert self.sub_id == self.diglab_df['guid'] + participant_df = pd.DataFrame([['sub-' + self.sub_id]], + columns=['participant_id']) + if not output.with_suffix('.tsv').exists(): + save_tsv(participant_df, output) + + def generate_metadata_file_tasks(self, output): + # here we want to call save_json and save_tsv() + pass + + def generate_metadata_file_dataset_description(self, output): + dataset_dict = { + "Name": self.project_name, + "BIDSVersion": "1.6.0", + "License": "CC BY 4.0", + "Authors": [self.diglab_df['user']], + "Acknowledgements": "TBA", + "HowToAcknowledge": "TBA", + "Funding": ["TBA"], + "ReferencesAndLinks": "TBA", + "EthicsApprovals": [self.diglab_df['ethical_protocol_id']] + } + save_json(dataset_dict, output) + + def generate_metadata_file_sessions(self, output): + session_df = pd.DataFrame([ + ['ses-' + self.ses_id, '2009-06-15T13:45:30', '120']], + columns=['session_id', 'acq_time', 'systolic_blood_pressure']) + if not output.with_suffix('.tsv').exists(): + save_tsv(session_df, output) + + def generate_metadata_file_probes(self, output): + probes_df = pd.DataFrame([ + ['e380a', 'multi-shank', 0, 'iridium-oxide', 0, 0, 0, 'circle', 20], + ['e380b', 'multi-shank', 1.5, 'iridium-oxide', 0, 100, 0, 'circle', 20], + ['t420a', 'tetrode', 3.6, 'iridium-oxide', 0, 200, 0, 'circle', 20], + ['t420b', 'tetrode', 7, 'iridium-oxide', 500, 0, 0, 'circle', 20]], + columns=['probe_id', 'type', 'coordinate_space', 'material', 'x', 'y', 'z', 'shape', + 'contact_size']) + save_tsv(probes_df, output) + + def generate_metadata_file_channels(self, output): + channels_df = pd.DataFrame([ + # [129, 1, 'neuronal', 'mV', 30000, 30, 'good'], + # [130, 3, 'neuronal', 'mV', 30000, 30, 'good'], + # [131, 5, 'neuronal', 'mV', 30000, 30, 'bad'], + # [132, 'n/a', 'sync_pulse', 'V', 1000, 1, 'n/a'] + ], + columns=['channel_id', 'contact_id', 'type', 'units', 'sampling_frequency', 'gain', + 'status']) + save_tsv(channels_df, output) + + def generate_metadata_file_contacts(self, output): + contact_df = pd.DataFrame([ + # [1, 'e380a', 0, 1.1, 'iridium-oxide', 0, 0, 0, 'circle', 20], + # [2, 'e380a', 0, 1.5, 'iridium-oxide', 0, 100, 0, 'circle', 20], + # [3, 'e380a', 0, 3.6, 'iridium-oxide', 0, 200, 0, 'circle', 20], + # [4, 'e380a', 1, 7, 'iridium-oxide', 500, 0, 0, 'circle', 20], + # [5, 'e380a', 1, 7, 'iridium-oxide', 500, 100, 0, 'circle', 20], + # [6, 'e380a', 1, 7, 'iridium-oxide', 500, 200, 0, 'circle', 20] + ], + columns=['contact_id', 'probe_id', 'shank_id', 'impedance', 'material', 'x', 'y', 'z', + 'shape', 'contact_size']) + save_tsv(contact_df, output) + + def generate_metadata_file_ephys(self, output): + # extract selected modalities + modes = self.diglab_df.filter(regex='modality___\w', axis=1) + modalities = modes.columns[modes==1].str.replace('modality___', '') + + trialbeh = self.diglab_df.filter(regex='subject_behaviour___\w', axis=1) + trialbeh = trialbeh.columns[trialbeh==1].str.replace('subject_behaviour___', '') + + posttrialbeh = self.diglab_df.filter(regex='subject_behaviour_2___\w', axis=1) + posttrialbeh = trialbeh.columns[posttrialbeh==1].str.replace('subject_behaviour_2___', '') + + rewardfluidtype = self.diglab_df.filter(regex='reward_fluid_type___\w', axis=1) + rewardfluidtype = trialbeh.columns[rewardfluidtype==1].str.replace('reward_fluid_type___', '') + + if self.diglab_df['reward_fluid_type_other']: + rewardfluidtype += [self.diglab_df['reward_fluid_type_other']] + + rewardothertype = self.diglab_df.filter(regex='reward_other___\w', axis=1) + rewardothertype = trialbeh.columns[rewardothertype==1].str.replace('reward_other___', '') + + + ephys_dict = { + # "PowerLineFrequency": 50, + # "PowerLineFrequencyUnit": "Hz", + # "Manufacturer": "OpenEphys", + # "ManufacturerModelName": "OpenEphys Starter Kit", + # "ManufacturerModelVersion": "", + # "SamplingFrequency": 30000, + # "SamplingFrequencyUnit": "Hz", + # "Location": "Institut de Neurosciences de la Timone, Faculté de Médecine, 27, " + # "Boulevard Jean Moulin, 13005 Marseille - France", + # "Software": "Cerebus", + # "SoftwareVersion": "1.5.1", + "Creator": self.diglab_df['user'], + # "Maintainer": "John Doe jr.", + # "Procedure": { + # "Pharmaceuticals": { + # "isoflurane": { + # "PharmaceuticalName": "isoflurane", + # "PharmaceuticalDoseAmount": 50, + # "PharmaceuticalDoseUnit": "ug/kg/min", + # }, + # "ketamine": { + # "PharmaceuticalName": "ketamine", + # "PharmaceuticalDoseAmount": 0.1, + # "PharmaceuticalDoseUnit": "ug/kg/min", + # }, + # }, + # }, + "Comments": self.diglab_df['comments_exp'], + "SessionNumber": self.diglab_df['ses_number'], + "Subject": { + "Weight": self.diglab_df['weight'], + "WeightUnit": 'kg', + "Comments": self.diglab_df['comments_subject'], + "SubjectBehaviour": trialbeh, + "PostTrialSubjectBehaviour": posttrialbeh, + }, + "SpecialEvents": {}, + "Modalities": modalities, + "Setup": { + "Comments": self.diglab_df['comments_setup'] + }, + "Rewards": { + "FluidType": rewardfluidtype, + "OtherType": rewardothertype, + }, + "DigLab": { + "record_id": self.diglab_df['record_id'], + "diglab_version": self.diglab_df['diglab_version'], + "redcap_form_version": self.diglab_df['redcap_form_version'], + } + } + + for id in range(3): + if self.diglab_df[f'special_event_{id}']: + ephys_dict["SpecialEvents"][id] = { + "Comment": self.diglab_df[f'special_event_{id}'], + "Time": self.diglab_df[f'special_event_time_{id}'] + } + + save_json(ephys_dict, output) + + def generate_metadata_file_scans(self, output): + # extract data quality value + qualities = self.diglab_df.filter(regex='data_quality___\w', axis=1) + quality = qualities.columns[qualities==1].str.replace('quality___', '') + + for key in self.data.keys(): + filename = f'ephys/sub-{self.sub_id}_ses-{self.ses_id}' + if key: + filename += key + filename += '.nix' + runs_df = pd.DataFrame([ + [filename, self.diglab_df['date']], self.diglab_df['exp_name'], + self.diglab_df['stimulation___yes'], + self.diglab_df['subject_behaviour_multi___yes'], self.diglab_df['time_last_trial'], + quality, self.diglab_df['incomplete_session___yes'], + self.diglab_df['reward_fluid'], 'ml', self.diglab_df['reward_fluid_additional'] + ], + columns=['filename', 'acq_date', 'exp_name', 'stimulation', 'post_trial_data', + 'time_last_trial', 'data_quality', 'incomplete_session', 'fluid_reward', + 'fluid_reward_unit', 'additional_fluid_reward']) + save_tsv(runs_df, output) + + def validate(self): + """ + Validate the generated structure using the BEP032 validator + + Parameters + ---------- + output_folder: str + path to the folder to validate + + Returns + ---------- + bool + True if validation was successful. False if it failed. + """ + bep032tools.validator.BEP032Validator.is_valid(self.basedir) + + +def create_file(source, destination, mode): + """ + Create a file at a destination location + + Parameters + ---------- + source: str + Source location of the file. + destination: str + Destination location of the file. + mode: str + File creation mode. Valid parameters are 'copy', 'link' and 'move'. + + Raises + ---------- + ValueError + In case of invalid creation mode. + """ + if mode == 'copy': + shutil.copy(source, destination) + elif mode == 'link': + os.link(source, destination) + elif mode == 'move': + shutil.move(source, destination) + else: + raise ValueError(f'Invalid file creation mode "{mode}"') + + +def extract_structure_from_csv(csv_file): + """ + Load csv file that contains folder structure information and return it as pandas.datafram. + + Parameters + ---------- + csv_file: str + The file to be loaded. + + Returns + ------- + pandas.dataframe + A dataframe containing the essential columns for creating an BEP032 structure + """ + if not HAVE_PANDAS: + raise ImportError('Extraction of bep032 structure from csv requires pandas.') + + df = pd.read_csv(csv_file, dtype=str) + + # ensure all fields contain information + if df.isnull().values.any(): + raise ValueError(f'Csv file contains empty cells.') + + # standardizing column labels + # df = df.rename(columns=LABEL_MAPPING) + + # Check is the header contains all required names + if not set(ESSENTIAL_CSV_COLUMNS).issubset(df.columns): + raise ValueError(f'Csv file ({csv_file}) does not contain required information ' + f'({ESSENTIAL_CSV_COLUMNS}). ' + f'Accepted column names are specified in the BEP.') + + return df + + +def generate_struct(csv_file, pathToDir): + """ + Create structure with csv file given in argument + This file must contain a header row specifying the provided data. Accepted titles are + defined in the BEP. + Essential information of the following attributes needs to be present. + Essential columns are 'sub_id' and 'ses_id'. + + Parameters + ---------- + csv_file: str + Csv file that contains a list of directories to create. + pathToDir: str + Path to directory where the directories will be created. + """ + + df = extract_structure_from_csv(csv_file) + + df = df[ESSENTIAL_CSV_COLUMNS] + test_data_files = [Path('empty_ephys.nix')] + for f in test_data_files: + f.touch() + + for session_kwargs in df.to_dict('index').values(): + session = BEP032TemplateData(**session_kwargs) + session.basedir = pathToDir + session.generate_structure() + session.register_data_files(*test_data_files) + session.organize_data_files(mode='copy') + session.generate_all_metadata_files() + + # cleanup + for f in test_data_files: + if f.exists(): + f.unlink() + + +def main(): + """ + + Notes + ---------- + + Usage via command line: BEP032Generator.py [-h] pathToCsv pathToDir + + positional arguments: + pathToCsv Path to your csv file + + pathToDir Path to your folder + + optional arguments: + -h, --help show this help message and exit + """ + + parser = argparse.ArgumentParser() + parser.add_argument('pathToCsv', help='Path to your csv file') + parser.add_argument('pathToDir', help='Path to your folder') + + # Create two argument groups + + args = parser.parse_args() + + # Check if directory exists + if not os.path.isdir(args.pathToDir): + print('Directory does not exist:', args.pathToDir) + exit(1) + generate_struct(args.pathToCsv, args.pathToDir) + + +if __name__ == '__main__': + main() diff --git a/diglab2ando.py b/diglab2bids.py similarity index 81% rename from diglab2ando.py rename to diglab2bids.py index 9d4d13c..7b80771 100644 --- a/diglab2ando.py +++ b/diglab2bids.py @@ -1,13 +1,15 @@ import re import pathlib import json -from ando.tools.generator.AnDOGenerator import AnDOData +from bep032tools.generator.BEP032Generator import BEP032Data from redcap_bridge.server_interface import download_records -from ando.checker import is_valid +from bep032tools.validator.BEP032Validator import is_valid + config_file = pathlib.Path(__file__).parent / 'config.json' project_name = 'SimpleProject' + with open(config_file) as f: conf = json.load(f) @@ -17,6 +19,7 @@ if not OUTPUT_FOLDER.exists(): OUTPUT_FOLDER.mkdir() + def get_metadata(conf, format): """ Fetch all recorded metadata from the server @@ -52,13 +55,16 @@ def convert_to_bids(records, OUTPUT_FOLDER): """ for record_dict in records: sub_id, ses_id = get_sub_ses_ids(record_dict) - gen = AnDOData(sub_id, ses_id, modality='ephys') - gen.register_data_files(get_data_file()) + gen = BEP032Data(sub_id, ses_id, modality='ephys') + files = gen.generate_data_files() + gen.register_data_files(files) gen.basedir = OUTPUT_FOLDER gen.generate_structure() - gen.generate_data_files(mode='move') + files = gen.generate_metadata_files() + gen.register_metadata_files(files) + - generate_metadata_files(record_dict, gen.get_data_folder()) + # generate_metadata_files(record_dict, gen.get_data_folder()) def get_sub_ses_ids(record_dict): @@ -89,47 +95,6 @@ def get_sub_ses_ids(record_dict): else: raise Exception("Record dict must only contain alphanumeric characters") -def get_data_file(): - """ - Parameters - ---------- - - Returns - ---------- - TODO: this needs to be replaced by a project-specific functions that converts the data to nix and provides the path to the nix file - """ - - dummy_nix_file = OUTPUT_FOLDER / 'dummy_file.nix' - if not dummy_nix_file.exists(): - dummy_nix_file.touch() - return dummy_nix_file - - -def generate_metadata_files(record_dict, save_dir): - """ - - Parameters - ---------- - record_dict: - save_dir: - - Returns - ---------- - - TODO: this needs to generate the basic BIDS metadata files and - """ - - # these can then be rearranged into the right location by the ando generator - filename = f'sub-{record_dict["guid"]}_ses-{record_dict["date"]}_ephys.json' - with open(save_dir / filename, 'w') as f: - json.dump(record_dict, f) - - metadata_filenames = ["dataset_description.json","tasks.json","participants.json" - "tasks.csv", "participants.csv"] - for filename in metadata_filenames: - with open(save_dir / filename, 'w') as f: - pass - if __name__ == '__main__': # json way of the world @@ -137,7 +102,7 @@ def generate_metadata_files(record_dict, save_dir): if not rec: raise ValueError(f'No records found for project {project_name}.') convert_to_bids(rec, OUTPUT_FOLDER) - ando.is_valid(OUTPUT_FOLDER) + is_valid(OUTPUT_FOLDER) diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py new file mode 100644 index 0000000..31bc87a --- /dev/null +++ b/tests/test_BEP032Templater.py @@ -0,0 +1,175 @@ +import os +import unittest +from pathlib import Path +from utils import initialize_test_directory, test_directory +from BEP032Templater import BEP032TemplateData + + +class Test_BEP032TemplateData(unittest.TestCase): + + def setUp(self): + test_dir = Path(initialize_test_directory(clean=True)) + self.sub_id = 'sub5' + self.ses_id = 'ses1' + self.tasks = None + self.runs = None + + sources = test_dir / 'sources' + sources.mkdir() + project = test_dir / 'project-A' + project.mkdir() + self.basedir = project + + d = BEP032TemplateData(self.sub_id, self.ses_id) + d.basedir = project + + self.bep032_data = d + prefix = f'sub-{self.sub_id}_ses-{self.ses_id}' + self.test_data_files = [sources / (prefix + '_ephy.nix'), + sources / (prefix + '_ephy.nwb')] + self.test_mdata_files = [sources / 'dataset_description.json', + sources / (prefix + '_probes.tsv'), + sources / (prefix + '_contacts.json')] + + for f in self.test_mdata_files + self.test_data_files: + f.touch() + + def test_get_data_folder(self): + df = self.bep032_data.get_data_folder() + self.assertTrue(df) + + df_abs = self.bep032_data.get_data_folder('absolute') + df_local = self.bep032_data.get_data_folder('local') + + self.assertTrue(df_local) + self.assertTrue(str(df_abs).endswith(str(df_local))) + + def test_generate_structure(self): + self.bep032_data.generate_structure() + df = self.bep032_data.get_data_folder() + self.assertTrue(df.exists()) + + def test_data_files(self): + self.bep032_data.generate_structure() + self.bep032_data.register_data_files(*self.test_data_files) + self.bep032_data.organize_data_files() + + session_folder = self.bep032_data.get_data_folder() + self.assertTrue(session_folder.exists()) + data_files = list(session_folder.glob('*.nix')) + data_files += list(session_folder.glob('*.nwb')) + self.assertEqual(len(self.test_data_files), len(data_files)) + for data_file in data_files: + self.assertTrue(data_file.name.find("_ephys")) + + def test_data_files_complex(self): + self.bep032_data.generate_structure() + nix_files = [self.test_data_files[0]] * 3 + runs = ['run1', 'run2'] + tasks = ['task1', 'task2'] + for run in runs: + for task in tasks: + self.bep032_data.register_data_files(*nix_files, + run=run, task=task) + + self.bep032_data.organize_data_files() + + session_folder = self.bep032_data.get_data_folder() + self.assertTrue(session_folder.exists()) + data_files = list(session_folder.glob('*.nix')) + self.assertEqual(len(data_files), len(runs) * len(tasks) * len(nix_files)) + + for data_file in data_files: + self.assertTrue(data_file.name.find("_ephys")) + + for run in runs: + exp = len(tasks) * len(nix_files) + files = list(session_folder.glob(f'*_run-{run}*.nix')) + self.assertEqual(len(files), exp) + + for task in tasks: + exp = len(runs) * len(nix_files) + files = list(session_folder.glob(f'*_task-{task}*.nix')) + self.assertEqual(len(files), exp) + + for split in range(len(nix_files)): + exp = len(runs) * len(tasks) + files = list(session_folder.glob(f'*_split-{split}*.nix')) + self.assertEqual(len(files), exp) + + def test_data_files_same_key(self): + self.bep032_data.generate_structure() + nix_files = [self.test_data_files[0]] + run = 'run1' + task = 'task1' + + self.bep032_data.register_data_files(*nix_files, run=run, task=task) + # register more data files in a second step + self.bep032_data.register_data_files(*nix_files, run=run, task=task) + + self.bep032_data.organize_data_files() + + session_folder = self.bep032_data.get_data_folder() + self.assertTrue(session_folder.exists()) + data_files = list(session_folder.glob('*.nix')) + self.assertEqual(len(data_files), 2) + + for data_file in data_files: + self.assertTrue(data_file.name.find(f"_task-{task}_run-{run}_split-")) + + def test_implemented_error_raised(self): + path = "" + self.test_generate_structure() + self.bep032_data.register_data_files(*self.test_data_files) + self.bep032_data.organize_data_files() + self.bep032_data.generate_all_metadata_files() + + def tearDown(self): + initialize_test_directory(clean=True) + + +class Test_ReadCsv(unittest.TestCase): + + def setUp(self): + csv_filename = generate_simple_csv_file() + self.csv_file = csv_filename + + def test_read_csv(self): + df = extract_structure_from_csv(self.csv_file) + expected_headers = ['sub_id', 'ses_id'] + self.assertListEqual(expected_headers, list(df)) + + +class Test_GenerateStruct(unittest.TestCase): + + def setUp(self): + initialize_test_directory(clean=True) + csv_filename = generate_simple_csv_file() + self.csv_file = csv_filename + + def test_generate_example_structure(self): + generate_struct(self.csv_file, test_directory) + # extract all paths that exist in the test directory + existing_paths = [p[0] for p in os.walk(test_directory)] + + # find path that is corresponding to each line of the csv file + with open(self.csv_file) as f: + header = f.readline() + # iterate through sessions + for line in f.readlines(): + found_path = False + for existing_path in existing_paths: + if all(key in existing_path for key in line.strip().split(',')): + found_path = True + break + if not found_path: + print(line.strip().split(',')) + + self.assertTrue(found_path) + + def doCleanups(self): + initialize_test_directory(clean=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_files/record.csv b/tests/test_files/record.csv new file mode 100644 index 0000000..0259a51 --- /dev/null +++ b/tests/test_files/record.csv @@ -0,0 +1,3 @@ +record_id,redcap_repeat_instrument,redcap_repeat_instance,redcap_survey_identifier,diglabform_timestamp,ethical_protocol_id,user,date,exp_name,guid,ses_number,modality___behaviour_eye,modality___behaviour_hand,modality___single_electrode,modality___multi_electrode,modality___emg,modality___int,modality___vsdi,modality___ecog,modality___seeg,stimulation,weight,comments_exp,comments_setup,comments_subject,data_quality,incomplete,subject_behaviour___very_motivated,subject_behaviour___working,subject_behaviour___thirsty,subject_behaviour___sleepy,subject_behaviour___unmotivated,subject_behaviour___agitated,subject_behaviour_multi,time_last_trial,subject_behaviour_2___agitated,subject_behaviour_2___resting,subject_behaviour_2___sleeping,subject_behaviour_2___working,reward_fluid,reward_fluid_additional,reward_fluid_type___water,reward_fluid_type___apple,reward_fluid_type___mixed,reward_fluid_type___other,reward_fluid_type_other,reward_other___fruit_fresh,reward_other___fruit_dry,reward_other___seeds,reward_other___treats,reward_other___insects,special_event_0,special_event_time_0,special_event_1,special_event_time_1,special_event_2,special_event_time_2,data_filename,exp_duration,task_name,task_mode,stim_set_primavoice,stim_set_morphing,stim_set_formant_saliency,stim_set_identity,stim_type_other,tone_frequency,tone_duration,stim_duration,stim_attenuation,stim_isi,stim_pretrial_delay,time_out_delay,reaction_timeout,min_n,max_n,reward_ratio,reward_duration,reward_volume,trial_count_correct,avg_stim_occurrence_correct,avg_stim_occurrence_all,tot_pres_stimuli_primavoice_correct,tot_pres_stimuli_primavoice_all,start_time_primavoice,tot_pres_stimuli_identity_correct,tot_pres_stimuli_identity_all,start_time_identity,tot_pres_stimuli_morphing_correct,tot_pres_stimuli_morphing_all,start_time_morphing,tot_pres_stimuli_formant_saliency_correct,tot_pres_stimuli_formant_saliency_all,start_time_formant_saliency,tot_pres_stimuli_bpn_correct,tot_pres_stimuli_bpn_all,start_time_bpn,tot_pres_stimuli_custom_stim_correct,tot_pres_stimuli_custom_stim_all,start_time_custom_stim,sua_1,sua_2,sua_3,sua_4,sua_5,sua_6,sua_7,sua_8,sua_9,sua_10,sua_11,sua_12,sua_13,sua_14,sua_15,sua_16,sua_17,sua_18,sua_19,sua_20,sua_21,sua_22,sua_23,sua_24,sua_25,sua_26,sua_27,sua_28,sua_29,sua_30,sua_31,sua_32,sua_33,sua_34,sua_35,sua_36,sua_37,sua_38,sua_39,sua_40,sua_41,sua_42,sua_43,sua_44,sua_45,sua_46,sua_47,sua_48,sua_49,sua_50,sua_51,sua_52,sua_53,sua_54,sua_55,sua_56,sua_57,sua_58,sua_59,sua_60,sua_61,sua_62,sua_63,sua_64,sua_65,sua_66,sua_67,sua_68,sua_69,sua_70,sua_71,sua_72,sua_73,sua_74,sua_75,sua_76,sua_77,sua_78,sua_79,sua_80,sua_81,sua_82,sua_83,sua_84,sua_85,sua_86,sua_87,sua_88,sua_89,sua_90,sua_91,sua_92,sua_93,sua_94,sua_95,sua_96,sua_isolated_1___isolated,sua_isolated_2___isolated,sua_isolated_3___isolated,sua_isolated_4___isolated,sua_isolated_5___isolated,sua_isolated_6___isolated,sua_isolated_7___isolated,sua_isolated_8___isolated,sua_isolated_9___isolated,sua_isolated_10___isolated,sua_isolated_11___isolated,sua_isolated_12___isolated,sua_isolated_13___isolated,sua_isolated_14___isolated,sua_isolated_15___isolated,sua_isolated_16___isolated,sua_isolated_17___isolated,sua_isolated_18___isolated,sua_isolated_19___isolated,sua_isolated_20___isolated,sua_isolated_21___isolated,sua_isolated_22___isolated,sua_isolated_23___isolated,sua_isolated_24___isolated,sua_isolated_25___isolated,sua_isolated_26___isolated,sua_isolated_27___isolated,sua_isolated_28___isolated,sua_isolated_29___isolated,sua_isolated_30___isolated,sua_isolated_31___isolated,sua_isolated_32___isolated,sua_isolated_33___isolated,sua_isolated_34___isolated,sua_isolated_35___isolated,sua_isolated_36___isolated,sua_isolated_37___isolated,sua_isolated_38___isolated,sua_isolated_39___isolated,sua_isolated_40___isolated,sua_isolated_41___isolated,sua_isolated_42___isolated,sua_isolated_43___isolated,sua_isolated_44___isolated,sua_isolated_45___isolated,sua_isolated_46___isolated,sua_isolated_47___isolated,sua_isolated_48___isolated,sua_isolated_49___isolated,sua_isolated_50___isolated,sua_isolated_51___isolated,sua_isolated_52___isolated,sua_isolated_53___isolated,sua_isolated_54___isolated,sua_isolated_55___isolated,sua_isolated_56___isolated,sua_isolated_57___isolated,sua_isolated_58___isolated,sua_isolated_59___isolated,sua_isolated_60___isolated,sua_isolated_61___isolated,sua_isolated_62___isolated,sua_isolated_63___isolated,sua_isolated_64___isolated,sua_isolated_65___isolated,sua_isolated_66___isolated,sua_isolated_67___isolated,sua_isolated_68___isolated,sua_isolated_69___isolated,sua_isolated_70___isolated,sua_isolated_71___isolated,sua_isolated_72___isolated,sua_isolated_73___isolated,sua_isolated_74___isolated,sua_isolated_75___isolated,sua_isolated_76___isolated,sua_isolated_77___isolated,sua_isolated_78___isolated,sua_isolated_79___isolated,sua_isolated_80___isolated,sua_isolated_81___isolated,sua_isolated_82___isolated,sua_isolated_83___isolated,sua_isolated_84___isolated,sua_isolated_85___isolated,sua_isolated_86___isolated,sua_isolated_87___isolated,sua_isolated_88___isolated,sua_isolated_89___isolated,sua_isolated_90___isolated,sua_isolated_91___isolated,sua_isolated_92___isolated,sua_isolated_93___isolated,sua_isolated_94___isolated,sua_isolated_95___isolated,sua_isolated_96___isolated,diglabform_complete +1,diglabform,1,,2022-01-14 17:55:00,0000_2000000000000000_v1,sprenger.j,2021-12-07,myexperiment,AAA11111111,1,0,1,0,1,0,0,0,0,0,,5.7,,,,good,,1,0,0,0,0,0,,,0,0,0,0,115,0,0,0,1,0,,1,1,0,1,0,,,,,,,data-211207-142257,46,mytask1,task_active,training96,,,,,1000,500,500,0,250,1500,4000,250,3,7,2,340,0.48,306,,,1532,1773,,,,,,,,,,,,,,,,,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 +2,diglabform,1,,2022-01-14 18:38:50,0000_2000000000000000_v2,sprenger.j,2021-12-08,myexperiment,AAA11111111,1,0,1,0,1,0,0,0,0,0,,6.7,,,,good,,1,0,0,0,0,0,,,0,0,0,0,130,0,0,0,1,0,,1,1,0,1,0,,,,,,,data-211208-142020,44,mytask2,task_active,testing,,,,,1000,500,500,0,250,1500,4000,250,3,7,2,340,0.48,335,,,1692,1866,,,,,,,,,,,,,,,,,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..8cfcdcb --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,31 @@ +import os +import tempfile +import shutil +import pathlib +test_directory = pathlib.Path(tempfile.gettempdir()) / 'diglab2bids_testfiles' + + +def initialize_test_directory(clean=True): + """ + Create main test folder if required + + Parameters + ---------- + clean: (bool) + Remove test folder first in case it exists. + + Returns + ------- + test_directory: (str) + path of the test directory + """ + if clean and os.path.exists(test_directory): + shutil.rmtree(test_directory) + + if not os.path.exists(test_directory): + os.mkdir(test_directory) + packaged_testfolder = pathlib.Path(__file__).parent / 'test_files' + shutil.copytree(packaged_testfolder, test_directory / 'test_files') + + return test_directory + From 856906d69b391d2f0c35deba3fd02bf364a9019f Mon Sep 17 00:00:00 2001 From: sprenger Date: Wed, 26 Jan 2022 15:59:41 +0100 Subject: [PATCH 02/13] continue developing Templater test --- tests/test_BEP032Templater.py | 199 ++++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 80 deletions(-) diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py index 31bc87a..81200c1 100644 --- a/tests/test_BEP032Templater.py +++ b/tests/test_BEP032Templater.py @@ -1,12 +1,12 @@ import os import unittest from pathlib import Path +import pandas as pd from utils import initialize_test_directory, test_directory from BEP032Templater import BEP032TemplateData -class Test_BEP032TemplateData(unittest.TestCase): - +class Test_BEP032Templater(unittest.TestCase): def setUp(self): test_dir = Path(initialize_test_directory(clean=True)) self.sub_id = 'sub5' @@ -19,8 +19,10 @@ def setUp(self): project = test_dir / 'project-A' project.mkdir() self.basedir = project + self.diglab_dfs = pd.read_csv('test_files/record.csv', header=0) + self.diglab_dict = self.diglab_dfs.to_dict(orient='index') - d = BEP032TemplateData(self.sub_id, self.ses_id) + d = BEP032TemplateData(self.sub_id, self.ses_id, diglab_df=self.diglab_dict[0]) d.basedir = project self.bep032_data = d @@ -34,95 +36,132 @@ def setUp(self): for f in self.test_mdata_files + self.test_data_files: f.touch() - def test_get_data_folder(self): - df = self.bep032_data.get_data_folder() - self.assertTrue(df) - - df_abs = self.bep032_data.get_data_folder('absolute') - df_local = self.bep032_data.get_data_folder('local') - - self.assertTrue(df_local) - self.assertTrue(str(df_abs).endswith(str(df_local))) - - def test_generate_structure(self): - self.bep032_data.generate_structure() - df = self.bep032_data.get_data_folder() - self.assertTrue(df.exists()) - - def test_data_files(self): + def test_generate_all_metadata(self): self.bep032_data.generate_structure() self.bep032_data.register_data_files(*self.test_data_files) self.bep032_data.organize_data_files() - session_folder = self.bep032_data.get_data_folder() - self.assertTrue(session_folder.exists()) - data_files = list(session_folder.glob('*.nix')) - data_files += list(session_folder.glob('*.nwb')) - self.assertEqual(len(self.test_data_files), len(data_files)) - for data_file in data_files: - self.assertTrue(data_file.name.find("_ephys")) - - def test_data_files_complex(self): - self.bep032_data.generate_structure() - nix_files = [self.test_data_files[0]] * 3 - runs = ['run1', 'run2'] - tasks = ['task1', 'task2'] - for run in runs: - for task in tasks: - self.bep032_data.register_data_files(*nix_files, - run=run, task=task) - - self.bep032_data.organize_data_files() - - session_folder = self.bep032_data.get_data_folder() - self.assertTrue(session_folder.exists()) - data_files = list(session_folder.glob('*.nix')) - self.assertEqual(len(data_files), len(runs) * len(tasks) * len(nix_files)) - - for data_file in data_files: - self.assertTrue(data_file.name.find("_ephys")) - - for run in runs: - exp = len(tasks) * len(nix_files) - files = list(session_folder.glob(f'*_run-{run}*.nix')) - self.assertEqual(len(files), exp) + self.bep032_data.generate_all_metadata_files() - for task in tasks: - exp = len(runs) * len(nix_files) - files = list(session_folder.glob(f'*_task-{task}*.nix')) - self.assertEqual(len(files), exp) - for split in range(len(nix_files)): - exp = len(runs) * len(tasks) - files = list(session_folder.glob(f'*_split-{split}*.nix')) - self.assertEqual(len(files), exp) +class Test_BEP032TemplateData(unittest.TestCase): - def test_data_files_same_key(self): - self.bep032_data.generate_structure() - nix_files = [self.test_data_files[0]] - run = 'run1' - task = 'task1' + def setUp(self): + test_dir = Path(initialize_test_directory(clean=True)) + self.sub_id = 'sub5' + self.ses_id = 'ses1' + self.tasks = None + self.runs = None - self.bep032_data.register_data_files(*nix_files, run=run, task=task) - # register more data files in a second step - self.bep032_data.register_data_files(*nix_files, run=run, task=task) + sources = test_dir / 'sources' + sources.mkdir() + project = test_dir / 'project-A' + project.mkdir() + self.basedir = project - self.bep032_data.organize_data_files() + d = BEP032TemplateData(self.sub_id, self.ses_id) + d.basedir = project - session_folder = self.bep032_data.get_data_folder() - self.assertTrue(session_folder.exists()) - data_files = list(session_folder.glob('*.nix')) - self.assertEqual(len(data_files), 2) + self.bep032_data = d + prefix = f'sub-{self.sub_id}_ses-{self.ses_id}' + self.test_data_files = [sources / (prefix + '_ephy.nix'), + sources / (prefix + '_ephy.nwb')] + self.test_mdata_files = [sources / 'dataset_description.json', + sources / (prefix + '_probes.tsv'), + sources / (prefix + '_contacts.json')] - for data_file in data_files: - self.assertTrue(data_file.name.find(f"_task-{task}_run-{run}_split-")) + for f in self.test_mdata_files + self.test_data_files: + f.touch() - def test_implemented_error_raised(self): - path = "" - self.test_generate_structure() - self.bep032_data.register_data_files(*self.test_data_files) - self.bep032_data.organize_data_files() - self.bep032_data.generate_all_metadata_files() + # def test_get_data_folder(self): + # df = self.bep032_data.get_data_folder() + # self.assertTrue(df) + # + # df_abs = self.bep032_data.get_data_folder('absolute') + # df_local = self.bep032_data.get_data_folder('local') + # + # self.assertTrue(df_local) + # self.assertTrue(str(df_abs).endswith(str(df_local))) + # + # def test_generate_structure(self): + # self.bep032_data.generate_structure() + # df = self.bep032_data.get_data_folder() + # self.assertTrue(df.exists()) + # + # def test_data_files(self): + # self.bep032_data.generate_structure() + # self.bep032_data.register_data_files(*self.test_data_files) + # self.bep032_data.organize_data_files() + # + # session_folder = self.bep032_data.get_data_folder() + # self.assertTrue(session_folder.exists()) + # data_files = list(session_folder.glob('*.nix')) + # data_files += list(session_folder.glob('*.nwb')) + # self.assertEqual(len(self.test_data_files), len(data_files)) + # for data_file in data_files: + # self.assertTrue(data_file.name.find("_ephys")) + # + # def test_data_files_complex(self): + # self.bep032_data.generate_structure() + # nix_files = [self.test_data_files[0]] * 3 + # runs = ['run1', 'run2'] + # tasks = ['task1', 'task2'] + # for run in runs: + # for task in tasks: + # self.bep032_data.register_data_files(*nix_files, + # run=run, task=task) + # + # self.bep032_data.organize_data_files() + # + # session_folder = self.bep032_data.get_data_folder() + # self.assertTrue(session_folder.exists()) + # data_files = list(session_folder.glob('*.nix')) + # self.assertEqual(len(data_files), len(runs) * len(tasks) * len(nix_files)) + # + # for data_file in data_files: + # self.assertTrue(data_file.name.find("_ephys")) + # + # for run in runs: + # exp = len(tasks) * len(nix_files) + # files = list(session_folder.glob(f'*_run-{run}*.nix')) + # self.assertEqual(len(files), exp) + # + # for task in tasks: + # exp = len(runs) * len(nix_files) + # files = list(session_folder.glob(f'*_task-{task}*.nix')) + # self.assertEqual(len(files), exp) + # + # for split in range(len(nix_files)): + # exp = len(runs) * len(tasks) + # files = list(session_folder.glob(f'*_split-{split}*.nix')) + # self.assertEqual(len(files), exp) + # + # def test_data_files_same_key(self): + # self.bep032_data.generate_structure() + # nix_files = [self.test_data_files[0]] + # run = 'run1' + # task = 'task1' + # + # self.bep032_data.register_data_files(*nix_files, run=run, task=task) + # # register more data files in a second step + # self.bep032_data.register_data_files(*nix_files, run=run, task=task) + # + # self.bep032_data.organize_data_files() + # + # session_folder = self.bep032_data.get_data_folder() + # self.assertTrue(session_folder.exists()) + # data_files = list(session_folder.glob('*.nix')) + # self.assertEqual(len(data_files), 2) + # + # for data_file in data_files: + # self.assertTrue(data_file.name.find(f"_task-{task}_run-{run}_split-")) + # + # def test_implemented_error_raised(self): + # path = "" + # self.test_generate_structure() + # self.bep032_data.register_data_files(*self.test_data_files) + # self.bep032_data.organize_data_files() + # self.bep032_data.generate_all_metadata_files() def tearDown(self): initialize_test_directory(clean=True) From cdba3001cd8f60a524506aeac7c0267edcb9a5c0 Mon Sep 17 00:00:00 2001 From: sprenger Date: Fri, 28 Jan 2022 16:51:58 +0100 Subject: [PATCH 03/13] improve pandas usage and data extraction --- BEP032Templater.py | 86 +++++++++++++++++++---------------- tests/test_BEP032Templater.py | 47 +++++++++++-------- 2 files changed, 73 insertions(+), 60 deletions(-) diff --git a/BEP032Templater.py b/BEP032Templater.py index 03d101c..3795b3f 100644 --- a/BEP032Templater.py +++ b/BEP032Templater.py @@ -53,9 +53,8 @@ def __init__(self, sub_id, ses_id, diglab_df=None, project_name=None): self.project_name = project_name def generate_metadata_file_participants(self, output): - assert self.sub_id == self.diglab_df['guid'] - participant_df = pd.DataFrame([['sub-' + self.sub_id]], - columns=['participant_id']) + assert self.sub_id == self.diglab_df['guid'].values[0] + participant_df = pd.DataFrame([['sub-' + self.sub_id]], columns=['participant_id']) if not output.with_suffix('.tsv').exists(): save_tsv(participant_df, output) @@ -68,12 +67,12 @@ def generate_metadata_file_dataset_description(self, output): "Name": self.project_name, "BIDSVersion": "1.6.0", "License": "CC BY 4.0", - "Authors": [self.diglab_df['user']], + "Authors": self.diglab_df['user'].to_list(), "Acknowledgements": "TBA", "HowToAcknowledge": "TBA", "Funding": ["TBA"], "ReferencesAndLinks": "TBA", - "EthicsApprovals": [self.diglab_df['ethical_protocol_id']] + "EthicsApprovals": self.diglab_df['ethical_protocol_id'].to_list() } save_json(dataset_dict, output) @@ -118,25 +117,26 @@ def generate_metadata_file_contacts(self, output): 'shape', 'contact_size']) save_tsv(contact_df, output) + def _get_compressed_choices(self, question_label, active_value=1): + # extract columns belonging to this question (if it's a multiple choice question) + question_df = self.diglab_df.filter(regex=f'{question_label}___\w', axis=1) + # shorten column names and only use choices as column labels + question_df.columns = question_df.columns.str.replace(f'{question_label}___', '') + # extract choices that contain 'active' (selected) value + choices = question_df.columns[(question_df.values==active_value)[0]].to_list() + return choices + def generate_metadata_file_ephys(self, output): # extract selected modalities - modes = self.diglab_df.filter(regex='modality___\w', axis=1) - modalities = modes.columns[modes==1].str.replace('modality___', '') - - trialbeh = self.diglab_df.filter(regex='subject_behaviour___\w', axis=1) - trialbeh = trialbeh.columns[trialbeh==1].str.replace('subject_behaviour___', '') - - posttrialbeh = self.diglab_df.filter(regex='subject_behaviour_2___\w', axis=1) - posttrialbeh = trialbeh.columns[posttrialbeh==1].str.replace('subject_behaviour_2___', '') - - rewardfluidtype = self.diglab_df.filter(regex='reward_fluid_type___\w', axis=1) - rewardfluidtype = trialbeh.columns[rewardfluidtype==1].str.replace('reward_fluid_type___', '') + modalities = self._get_compressed_choices('modality') + trialbeh = self._get_compressed_choices('subject_behaviour') + posttrialbeh = self._get_compressed_choices('subject_behaviour_2') + rewardfluidtype = self._get_compressed_choices('reward_fluid_type') - if self.diglab_df['reward_fluid_type_other']: - rewardfluidtype += [self.diglab_df['reward_fluid_type_other']] + if self.diglab_df['reward_fluid_type_other'].values[0]: + rewardfluidtype += self.diglab_df['reward_fluid_type_other'].values - rewardothertype = self.diglab_df.filter(regex='reward_other___\w', axis=1) - rewardothertype = trialbeh.columns[rewardothertype==1].str.replace('reward_other___', '') + rewardothertype = self._get_compressed_choices('reward_other') ephys_dict = { @@ -151,7 +151,7 @@ def generate_metadata_file_ephys(self, output): # "Boulevard Jean Moulin, 13005 Marseille - France", # "Software": "Cerebus", # "SoftwareVersion": "1.5.1", - "Creator": self.diglab_df['user'], + "Creator": self.diglab_df['user'].values[0], # "Maintainer": "John Doe jr.", # "Procedure": { # "Pharmaceuticals": { @@ -167,56 +167,62 @@ def generate_metadata_file_ephys(self, output): # }, # }, # }, - "Comments": self.diglab_df['comments_exp'], - "SessionNumber": self.diglab_df['ses_number'], + "Comments": self.diglab_df['comments_exp'].values[0], + "SessionNumber": self.diglab_df['ses_number'].values[0], "Subject": { - "Weight": self.diglab_df['weight'], + "Weight": self.diglab_df['weight'].values[0], "WeightUnit": 'kg', - "Comments": self.diglab_df['comments_subject'], + "Comments": self.diglab_df['comments_subject'].values[0], "SubjectBehaviour": trialbeh, "PostTrialSubjectBehaviour": posttrialbeh, }, "SpecialEvents": {}, "Modalities": modalities, "Setup": { - "Comments": self.diglab_df['comments_setup'] + "Comments": self.diglab_df['comments_setup'].values[0] }, "Rewards": { "FluidType": rewardfluidtype, "OtherType": rewardothertype, }, "DigLab": { - "record_id": self.diglab_df['record_id'], - "diglab_version": self.diglab_df['diglab_version'], - "redcap_form_version": self.diglab_df['redcap_form_version'], + "record_id": self.diglab_df['record_id'].values[0], + # TODO: Fix test dataset + # "diglab_version": self.diglab_df['provenance_diglabtools_version'].values[0], + # "redcap_form_version": self.diglab_df['redcap_form_version'].values[0], } } for id in range(3): - if self.diglab_df[f'special_event_{id}']: + if self.diglab_df[f'special_event_{id}'].values[0]: ephys_dict["SpecialEvents"][id] = { - "Comment": self.diglab_df[f'special_event_{id}'], - "Time": self.diglab_df[f'special_event_time_{id}'] + "Comment": self.diglab_df[f'special_event_{id}'].values[0], + "Time": self.diglab_df[f'special_event_time_{id}'].values[0] } save_json(ephys_dict, output) def generate_metadata_file_scans(self, output): # extract data quality value - qualities = self.diglab_df.filter(regex='data_quality___\w', axis=1) - quality = qualities.columns[qualities==1].str.replace('quality___', '') + quality = self._get_compressed_choices('data_quality') for key in self.data.keys(): filename = f'ephys/sub-{self.sub_id}_ses-{self.ses_id}' if key: filename += key filename += '.nix' - runs_df = pd.DataFrame([ - [filename, self.diglab_df['date']], self.diglab_df['exp_name'], - self.diglab_df['stimulation___yes'], - self.diglab_df['subject_behaviour_multi___yes'], self.diglab_df['time_last_trial'], - quality, self.diglab_df['incomplete_session___yes'], - self.diglab_df['reward_fluid'], 'ml', self.diglab_df['reward_fluid_additional'] + runs_df = pd.DataFrame([[ + filename, + self.diglab_df['date'].values[0]], + self.diglab_df['exp_name'].values[0], + self.diglab_df['stimulation___yes'].values[0], + self.diglab_df['subject_behaviour_multi___yes'].values[0], + self.diglab_df['time_last_trial'].values[0], + quality, + self.diglab_df['incomplete_session___yes'].values[0], + self.diglab_df['reward_fluid'].values[0], + 'ml', + self.diglab_df['reward_fluid_additional'].values[0] ], columns=['filename', 'acq_date', 'exp_name', 'stimulation', 'post_trial_data', 'time_last_trial', 'data_quality', 'incomplete_session', 'fluid_reward', diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py index 81200c1..6543ffe 100644 --- a/tests/test_BEP032Templater.py +++ b/tests/test_BEP032Templater.py @@ -9,8 +9,6 @@ class Test_BEP032Templater(unittest.TestCase): def setUp(self): test_dir = Path(initialize_test_directory(clean=True)) - self.sub_id = 'sub5' - self.ses_id = 'ses1' self.tasks = None self.runs = None @@ -19,29 +17,38 @@ def setUp(self): project = test_dir / 'project-A' project.mkdir() self.basedir = project - self.diglab_dfs = pd.read_csv('test_files/record.csv', header=0) - self.diglab_dict = self.diglab_dfs.to_dict(orient='index') - - d = BEP032TemplateData(self.sub_id, self.ses_id, diglab_df=self.diglab_dict[0]) - d.basedir = project - - self.bep032_data = d - prefix = f'sub-{self.sub_id}_ses-{self.ses_id}' - self.test_data_files = [sources / (prefix + '_ephy.nix'), - sources / (prefix + '_ephy.nwb')] - self.test_mdata_files = [sources / 'dataset_description.json', + self.diglab_dfs = pd.read_csv('test_files/record.csv', header=0, na_filter=False, dtype=str) + record_ids = self.diglab_dfs.index.values + + self.bep032_data_list = [] + self.test_data_files = [] + self.test_mdata_files = [] + + for record_id in record_ids: + record = self.diglab_dfs.loc[[record_id]] + sub_id = record['guid'].values[0] + ses_id = record['exp_name'].values[0] + d = BEP032TemplateData(sub_id, ses_id, diglab_df=record) + d.basedir = project + + self.bep032_data_list.append(d) + prefix = f'sub-{sub_id}_ses-{ses_id}' + self.test_data_files.append([sources / (prefix + '_ephy.nix'), + sources / (prefix + '_ephy.nwb')]) + self.test_mdata_files.append([sources / 'dataset_description.json', sources / (prefix + '_probes.tsv'), - sources / (prefix + '_contacts.json')] + sources / (prefix + '_contacts.json')]) - for f in self.test_mdata_files + self.test_data_files: - f.touch() + for f in self.test_mdata_files[-1] + self.test_data_files[-1]: + f.touch() def test_generate_all_metadata(self): - self.bep032_data.generate_structure() - self.bep032_data.register_data_files(*self.test_data_files) - self.bep032_data.organize_data_files() + for i, data in enumerate(self.bep032_data_list): + data.generate_structure() + data.register_data_files(*self.test_data_files[i]) + data.organize_data_files() - self.bep032_data.generate_all_metadata_files() + data.generate_all_metadata_files() class Test_BEP032TemplateData(unittest.TestCase): From 7865036b2c5c38bb9a9bb2bd17bfdfde25d082b8 Mon Sep 17 00:00:00 2001 From: sprenger Date: Tue, 15 Feb 2022 15:57:23 +0100 Subject: [PATCH 04/13] [temp] fix headers of generated tsv dataframes --- BEP032Templater.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/BEP032Templater.py b/BEP032Templater.py index 3795b3f..95fa14b 100644 --- a/BEP032Templater.py +++ b/BEP032Templater.py @@ -55,6 +55,7 @@ def __init__(self, sub_id, ses_id, diglab_df=None, project_name=None): def generate_metadata_file_participants(self, output): assert self.sub_id == self.diglab_df['guid'].values[0] participant_df = pd.DataFrame([['sub-' + self.sub_id]], columns=['participant_id']) + participant_df.set_index('participant_id', inplace=True) if not output.with_suffix('.tsv').exists(): save_tsv(participant_df, output) @@ -80,6 +81,7 @@ def generate_metadata_file_sessions(self, output): session_df = pd.DataFrame([ ['ses-' + self.ses_id, '2009-06-15T13:45:30', '120']], columns=['session_id', 'acq_time', 'systolic_blood_pressure']) + session_df.set_index('session_id', inplace=True) if not output.with_suffix('.tsv').exists(): save_tsv(session_df, output) @@ -91,6 +93,7 @@ def generate_metadata_file_probes(self, output): ['t420b', 'tetrode', 7, 'iridium-oxide', 500, 0, 0, 'circle', 20]], columns=['probe_id', 'type', 'coordinate_space', 'material', 'x', 'y', 'z', 'shape', 'contact_size']) + probes_df.set_index('probe_id', inplace=True) save_tsv(probes_df, output) def generate_metadata_file_channels(self, output): @@ -102,6 +105,7 @@ def generate_metadata_file_channels(self, output): ], columns=['channel_id', 'contact_id', 'type', 'units', 'sampling_frequency', 'gain', 'status']) + channels_df.set_index(['channel_id', 'contact_id'], inplace=True) save_tsv(channels_df, output) def generate_metadata_file_contacts(self, output): @@ -115,6 +119,7 @@ def generate_metadata_file_contacts(self, output): ], columns=['contact_id', 'probe_id', 'shank_id', 'impedance', 'material', 'x', 'y', 'z', 'shape', 'contact_size']) + contact_df.set_index(['contact_id', 'probe_id'], inplace=True) save_tsv(contact_df, output) def _get_compressed_choices(self, question_label, active_value=1): @@ -213,20 +218,21 @@ def generate_metadata_file_scans(self, output): filename += '.nix' runs_df = pd.DataFrame([[ filename, - self.diglab_df['date'].values[0]], + self.diglab_df['date'].values[0], self.diglab_df['exp_name'].values[0], - self.diglab_df['stimulation___yes'].values[0], - self.diglab_df['subject_behaviour_multi___yes'].values[0], + self.diglab_df['stimulation'].values[0], + self.diglab_df['subject_behaviour_multi'].values[0], self.diglab_df['time_last_trial'].values[0], quality, - self.diglab_df['incomplete_session___yes'].values[0], + self.diglab_df['incomplete'].values[0], self.diglab_df['reward_fluid'].values[0], 'ml', self.diglab_df['reward_fluid_additional'].values[0] - ], + ]], columns=['filename', 'acq_date', 'exp_name', 'stimulation', 'post_trial_data', 'time_last_trial', 'data_quality', 'incomplete_session', 'fluid_reward', 'fluid_reward_unit', 'additional_fluid_reward']) + runs_df.set_index('filename', inplace=True) save_tsv(runs_df, output) def validate(self): From 72212c9983632ceb5fcb8f46648c994c1a8e5a4f Mon Sep 17 00:00:00 2001 From: sprenger Date: Tue, 15 Feb 2022 15:59:03 +0100 Subject: [PATCH 05/13] [temp] fix tests for template - use date as session identifier to avoid ambiguities --- tests/test_BEP032Templater.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py index 6543ffe..f5bd0be 100644 --- a/tests/test_BEP032Templater.py +++ b/tests/test_BEP032Templater.py @@ -27,14 +27,13 @@ def setUp(self): for record_id in record_ids: record = self.diglab_dfs.loc[[record_id]] sub_id = record['guid'].values[0] - ses_id = record['exp_name'].values[0] + ses_id = record['date'].values[0] d = BEP032TemplateData(sub_id, ses_id, diglab_df=record) d.basedir = project self.bep032_data_list.append(d) prefix = f'sub-{sub_id}_ses-{ses_id}' - self.test_data_files.append([sources / (prefix + '_ephy.nix'), - sources / (prefix + '_ephy.nwb')]) + self.test_data_files.append([sources / (prefix + '_ephy.nix')]) self.test_mdata_files.append([sources / 'dataset_description.json', sources / (prefix + '_probes.tsv'), sources / (prefix + '_contacts.json')]) @@ -44,8 +43,10 @@ def setUp(self): def test_generate_all_metadata(self): for i, data in enumerate(self.bep032_data_list): + ses_number = self.diglab_dfs.loc[[i]]['ses_number'].values[0] + exp_name = self.diglab_dfs.loc[[i]]['exp_name'].values[0] data.generate_structure() - data.register_data_files(*self.test_data_files[i]) + data.register_data_files(*self.test_data_files[i], run=ses_number, task=exp_name) data.organize_data_files() data.generate_all_metadata_files() From d53f6243df6581afc5756e1e8cf73d76acc7c159 Mon Sep 17 00:00:00 2001 From: sprenger Date: Tue, 15 Feb 2022 15:59:30 +0100 Subject: [PATCH 06/13] [temp] cleanup and comments --- BEP032Templater.py | 206 ++++++++++++---------------------- tests/test_BEP032Templater.py | 82 +++++++------- 2 files changed, 113 insertions(+), 175 deletions(-) diff --git a/BEP032Templater.py b/BEP032Templater.py index 95fa14b..32192a4 100644 --- a/BEP032Templater.py +++ b/BEP032Templater.py @@ -252,137 +252,75 @@ def validate(self): bep032tools.validator.BEP032Validator.is_valid(self.basedir) -def create_file(source, destination, mode): - """ - Create a file at a destination location - - Parameters - ---------- - source: str - Source location of the file. - destination: str - Destination location of the file. - mode: str - File creation mode. Valid parameters are 'copy', 'link' and 'move'. - - Raises - ---------- - ValueError - In case of invalid creation mode. - """ - if mode == 'copy': - shutil.copy(source, destination) - elif mode == 'link': - os.link(source, destination) - elif mode == 'move': - shutil.move(source, destination) - else: - raise ValueError(f'Invalid file creation mode "{mode}"') - - -def extract_structure_from_csv(csv_file): - """ - Load csv file that contains folder structure information and return it as pandas.datafram. - - Parameters - ---------- - csv_file: str - The file to be loaded. - - Returns - ------- - pandas.dataframe - A dataframe containing the essential columns for creating an BEP032 structure - """ - if not HAVE_PANDAS: - raise ImportError('Extraction of bep032 structure from csv requires pandas.') - - df = pd.read_csv(csv_file, dtype=str) - - # ensure all fields contain information - if df.isnull().values.any(): - raise ValueError(f'Csv file contains empty cells.') - - # standardizing column labels - # df = df.rename(columns=LABEL_MAPPING) - - # Check is the header contains all required names - if not set(ESSENTIAL_CSV_COLUMNS).issubset(df.columns): - raise ValueError(f'Csv file ({csv_file}) does not contain required information ' - f'({ESSENTIAL_CSV_COLUMNS}). ' - f'Accepted column names are specified in the BEP.') - - return df - - -def generate_struct(csv_file, pathToDir): - """ - Create structure with csv file given in argument - This file must contain a header row specifying the provided data. Accepted titles are - defined in the BEP. - Essential information of the following attributes needs to be present. - Essential columns are 'sub_id' and 'ses_id'. - - Parameters - ---------- - csv_file: str - Csv file that contains a list of directories to create. - pathToDir: str - Path to directory where the directories will be created. - """ - - df = extract_structure_from_csv(csv_file) - - df = df[ESSENTIAL_CSV_COLUMNS] - test_data_files = [Path('empty_ephys.nix')] - for f in test_data_files: - f.touch() - - for session_kwargs in df.to_dict('index').values(): - session = BEP032TemplateData(**session_kwargs) - session.basedir = pathToDir - session.generate_structure() - session.register_data_files(*test_data_files) - session.organize_data_files(mode='copy') - session.generate_all_metadata_files() - - # cleanup - for f in test_data_files: - if f.exists(): - f.unlink() - - -def main(): - """ - - Notes - ---------- - - Usage via command line: BEP032Generator.py [-h] pathToCsv pathToDir - - positional arguments: - pathToCsv Path to your csv file - - pathToDir Path to your folder - - optional arguments: - -h, --help show this help message and exit - """ - - parser = argparse.ArgumentParser() - parser.add_argument('pathToCsv', help='Path to your csv file') - parser.add_argument('pathToDir', help='Path to your folder') - - # Create two argument groups - - args = parser.parse_args() - - # Check if directory exists - if not os.path.isdir(args.pathToDir): - print('Directory does not exist:', args.pathToDir) - exit(1) - generate_struct(args.pathToCsv, args.pathToDir) - - -if __name__ == '__main__': - main() +# +# def generate_struct(csv_file, pathToDir): +# """ +# Create structure with csv file given in argument +# This file must contain a header row specifying the provided data. Accepted titles are +# defined in the BEP. +# Essential information of the following attributes needs to be present. +# Essential columns are 'sub_id' and 'ses_id'. +# +# Parameters +# ---------- +# csv_file: str +# Csv file that contains a list of directories to create. +# pathToDir: str +# Path to directory where the directories will be created. +# """ +# +# df = extract_structure_from_csv(csv_file) +# +# df = df[ESSENTIAL_CSV_COLUMNS] +# test_data_files = [Path('empty_ephys.nix')] +# for f in test_data_files: +# f.touch() +# +# for session_kwargs in df.to_dict('index').values(): +# session = BEP032TemplateData(**session_kwargs) +# session.basedir = pathToDir +# session.generate_structure() +# session.register_data_files(*test_data_files) +# session.organize_data_files(mode='copy') +# session.generate_all_metadata_files() +# +# # cleanup +# for f in test_data_files: +# if f.exists(): +# f.unlink() +# +# +# def main(): +# """ +# +# Notes +# ---------- +# +# Usage via command line: BEP032Generator.py [-h] pathToCsv pathToDir +# +# positional arguments: +# pathToCsv Path to your csv file +# +# pathToDir Path to your folder +# +# optional arguments: +# -h, --help show this help message and exit +# """ +# +# parser = argparse.ArgumentParser() +# parser.add_argument('pathToCsv', help='Path to your csv file') +# parser.add_argument('pathToDir', help='Path to your folder') +# +# # Create two argument groups +# +# args = parser.parse_args() +# +# # Check if directory exists +# if not os.path.isdir(args.pathToDir): +# print('Directory does not exist:', args.pathToDir) +# exit(1) +# generate_struct(args.pathToCsv, args.pathToDir) +# +# +# if __name__ == '__main__': +# main() diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py index f5bd0be..d54db6a 100644 --- a/tests/test_BEP032Templater.py +++ b/tests/test_BEP032Templater.py @@ -175,47 +175,47 @@ def tearDown(self): initialize_test_directory(clean=True) -class Test_ReadCsv(unittest.TestCase): - - def setUp(self): - csv_filename = generate_simple_csv_file() - self.csv_file = csv_filename - - def test_read_csv(self): - df = extract_structure_from_csv(self.csv_file) - expected_headers = ['sub_id', 'ses_id'] - self.assertListEqual(expected_headers, list(df)) - - -class Test_GenerateStruct(unittest.TestCase): - - def setUp(self): - initialize_test_directory(clean=True) - csv_filename = generate_simple_csv_file() - self.csv_file = csv_filename - - def test_generate_example_structure(self): - generate_struct(self.csv_file, test_directory) - # extract all paths that exist in the test directory - existing_paths = [p[0] for p in os.walk(test_directory)] - - # find path that is corresponding to each line of the csv file - with open(self.csv_file) as f: - header = f.readline() - # iterate through sessions - for line in f.readlines(): - found_path = False - for existing_path in existing_paths: - if all(key in existing_path for key in line.strip().split(',')): - found_path = True - break - if not found_path: - print(line.strip().split(',')) - - self.assertTrue(found_path) - - def doCleanups(self): - initialize_test_directory(clean=True) +# class Test_ReadCsv(unittest.TestCase): +# +# def setUp(self): +# csv_filename = generate_simple_csv_file() +# self.csv_file = csv_filename +# +# def test_read_csv(self): +# df = extract_structure_from_csv(self.csv_file) +# expected_headers = ['sub_id', 'ses_id'] +# self.assertListEqual(expected_headers, list(df)) +# +# +# class Test_GenerateStruct(unittest.TestCase): +# +# def setUp(self): +# initialize_test_directory(clean=True) +# csv_filename = generate_simple_csv_file() +# self.csv_file = csv_filename +# +# def test_generate_example_structure(self): +# generate_struct(self.csv_file, test_directory) +# # extract all paths that exist in the test directory +# existing_paths = [p[0] for p in os.walk(test_directory)] +# +# # find path that is corresponding to each line of the csv file +# with open(self.csv_file) as f: +# header = f.readline() +# # iterate through sessions +# for line in f.readlines(): +# found_path = False +# for existing_path in existing_paths: +# if all(key in existing_path for key in line.strip().split(',')): +# found_path = True +# break +# if not found_path: +# print(line.strip().split(',')) +# +# self.assertTrue(found_path) +# +# def doCleanups(self): +# initialize_test_directory(clean=True) if __name__ == '__main__': From 7020fe7b039f982c44e6a9db98a2204f564af914 Mon Sep 17 00:00:00 2001 From: sprenger Date: Tue, 15 Feb 2022 16:00:13 +0100 Subject: [PATCH 07/13] [temp] extend test files and avoid ambiguous records --- tests/test_files/record.csv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_files/record.csv b/tests/test_files/record.csv index 0259a51..6a77028 100644 --- a/tests/test_files/record.csv +++ b/tests/test_files/record.csv @@ -1,3 +1,4 @@ record_id,redcap_repeat_instrument,redcap_repeat_instance,redcap_survey_identifier,diglabform_timestamp,ethical_protocol_id,user,date,exp_name,guid,ses_number,modality___behaviour_eye,modality___behaviour_hand,modality___single_electrode,modality___multi_electrode,modality___emg,modality___int,modality___vsdi,modality___ecog,modality___seeg,stimulation,weight,comments_exp,comments_setup,comments_subject,data_quality,incomplete,subject_behaviour___very_motivated,subject_behaviour___working,subject_behaviour___thirsty,subject_behaviour___sleepy,subject_behaviour___unmotivated,subject_behaviour___agitated,subject_behaviour_multi,time_last_trial,subject_behaviour_2___agitated,subject_behaviour_2___resting,subject_behaviour_2___sleeping,subject_behaviour_2___working,reward_fluid,reward_fluid_additional,reward_fluid_type___water,reward_fluid_type___apple,reward_fluid_type___mixed,reward_fluid_type___other,reward_fluid_type_other,reward_other___fruit_fresh,reward_other___fruit_dry,reward_other___seeds,reward_other___treats,reward_other___insects,special_event_0,special_event_time_0,special_event_1,special_event_time_1,special_event_2,special_event_time_2,data_filename,exp_duration,task_name,task_mode,stim_set_primavoice,stim_set_morphing,stim_set_formant_saliency,stim_set_identity,stim_type_other,tone_frequency,tone_duration,stim_duration,stim_attenuation,stim_isi,stim_pretrial_delay,time_out_delay,reaction_timeout,min_n,max_n,reward_ratio,reward_duration,reward_volume,trial_count_correct,avg_stim_occurrence_correct,avg_stim_occurrence_all,tot_pres_stimuli_primavoice_correct,tot_pres_stimuli_primavoice_all,start_time_primavoice,tot_pres_stimuli_identity_correct,tot_pres_stimuli_identity_all,start_time_identity,tot_pres_stimuli_morphing_correct,tot_pres_stimuli_morphing_all,start_time_morphing,tot_pres_stimuli_formant_saliency_correct,tot_pres_stimuli_formant_saliency_all,start_time_formant_saliency,tot_pres_stimuli_bpn_correct,tot_pres_stimuli_bpn_all,start_time_bpn,tot_pres_stimuli_custom_stim_correct,tot_pres_stimuli_custom_stim_all,start_time_custom_stim,sua_1,sua_2,sua_3,sua_4,sua_5,sua_6,sua_7,sua_8,sua_9,sua_10,sua_11,sua_12,sua_13,sua_14,sua_15,sua_16,sua_17,sua_18,sua_19,sua_20,sua_21,sua_22,sua_23,sua_24,sua_25,sua_26,sua_27,sua_28,sua_29,sua_30,sua_31,sua_32,sua_33,sua_34,sua_35,sua_36,sua_37,sua_38,sua_39,sua_40,sua_41,sua_42,sua_43,sua_44,sua_45,sua_46,sua_47,sua_48,sua_49,sua_50,sua_51,sua_52,sua_53,sua_54,sua_55,sua_56,sua_57,sua_58,sua_59,sua_60,sua_61,sua_62,sua_63,sua_64,sua_65,sua_66,sua_67,sua_68,sua_69,sua_70,sua_71,sua_72,sua_73,sua_74,sua_75,sua_76,sua_77,sua_78,sua_79,sua_80,sua_81,sua_82,sua_83,sua_84,sua_85,sua_86,sua_87,sua_88,sua_89,sua_90,sua_91,sua_92,sua_93,sua_94,sua_95,sua_96,sua_isolated_1___isolated,sua_isolated_2___isolated,sua_isolated_3___isolated,sua_isolated_4___isolated,sua_isolated_5___isolated,sua_isolated_6___isolated,sua_isolated_7___isolated,sua_isolated_8___isolated,sua_isolated_9___isolated,sua_isolated_10___isolated,sua_isolated_11___isolated,sua_isolated_12___isolated,sua_isolated_13___isolated,sua_isolated_14___isolated,sua_isolated_15___isolated,sua_isolated_16___isolated,sua_isolated_17___isolated,sua_isolated_18___isolated,sua_isolated_19___isolated,sua_isolated_20___isolated,sua_isolated_21___isolated,sua_isolated_22___isolated,sua_isolated_23___isolated,sua_isolated_24___isolated,sua_isolated_25___isolated,sua_isolated_26___isolated,sua_isolated_27___isolated,sua_isolated_28___isolated,sua_isolated_29___isolated,sua_isolated_30___isolated,sua_isolated_31___isolated,sua_isolated_32___isolated,sua_isolated_33___isolated,sua_isolated_34___isolated,sua_isolated_35___isolated,sua_isolated_36___isolated,sua_isolated_37___isolated,sua_isolated_38___isolated,sua_isolated_39___isolated,sua_isolated_40___isolated,sua_isolated_41___isolated,sua_isolated_42___isolated,sua_isolated_43___isolated,sua_isolated_44___isolated,sua_isolated_45___isolated,sua_isolated_46___isolated,sua_isolated_47___isolated,sua_isolated_48___isolated,sua_isolated_49___isolated,sua_isolated_50___isolated,sua_isolated_51___isolated,sua_isolated_52___isolated,sua_isolated_53___isolated,sua_isolated_54___isolated,sua_isolated_55___isolated,sua_isolated_56___isolated,sua_isolated_57___isolated,sua_isolated_58___isolated,sua_isolated_59___isolated,sua_isolated_60___isolated,sua_isolated_61___isolated,sua_isolated_62___isolated,sua_isolated_63___isolated,sua_isolated_64___isolated,sua_isolated_65___isolated,sua_isolated_66___isolated,sua_isolated_67___isolated,sua_isolated_68___isolated,sua_isolated_69___isolated,sua_isolated_70___isolated,sua_isolated_71___isolated,sua_isolated_72___isolated,sua_isolated_73___isolated,sua_isolated_74___isolated,sua_isolated_75___isolated,sua_isolated_76___isolated,sua_isolated_77___isolated,sua_isolated_78___isolated,sua_isolated_79___isolated,sua_isolated_80___isolated,sua_isolated_81___isolated,sua_isolated_82___isolated,sua_isolated_83___isolated,sua_isolated_84___isolated,sua_isolated_85___isolated,sua_isolated_86___isolated,sua_isolated_87___isolated,sua_isolated_88___isolated,sua_isolated_89___isolated,sua_isolated_90___isolated,sua_isolated_91___isolated,sua_isolated_92___isolated,sua_isolated_93___isolated,sua_isolated_94___isolated,sua_isolated_95___isolated,sua_isolated_96___isolated,diglabform_complete 1,diglabform,1,,2022-01-14 17:55:00,0000_2000000000000000_v1,sprenger.j,2021-12-07,myexperiment,AAA11111111,1,0,1,0,1,0,0,0,0,0,,5.7,,,,good,,1,0,0,0,0,0,,,0,0,0,0,115,0,0,0,1,0,,1,1,0,1,0,,,,,,,data-211207-142257,46,mytask1,task_active,training96,,,,,1000,500,500,0,250,1500,4000,250,3,7,2,340,0.48,306,,,1532,1773,,,,,,,,,,,,,,,,,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 -2,diglabform,1,,2022-01-14 18:38:50,0000_2000000000000000_v2,sprenger.j,2021-12-08,myexperiment,AAA11111111,1,0,1,0,1,0,0,0,0,0,,6.7,,,,good,,1,0,0,0,0,0,,,0,0,0,0,130,0,0,0,1,0,,1,1,0,1,0,,,,,,,data-211208-142020,44,mytask2,task_active,testing,,,,,1000,500,500,0,250,1500,4000,250,3,7,2,340,0.48,335,,,1692,1866,,,,,,,,,,,,,,,,,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 +2,diglabform,1,,2022-01-14 18:38:50,0000_2000000000000000_v2,sprenger.j,2021-12-07,myexperiment,AAA11111111,2,0,1,0,1,0,0,0,0,0,,6.7,,,,good,,1,0,0,0,0,0,,,0,0,0,0,130,0,0,0,1,0,,1,1,0,1,0,,,,,,,data-211208-142020,44,mytask2,task_active,testing,,,,,1000,500,500,0,250,1500,4000,250,3,7,2,340,0.48,335,,,1692,1866,,,,,,,,,,,,,,,,,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 +3,diglabform,1,,2022-01-14 18:38:50,0000_2000000000000000_v2,sprenger.j,2021-12-08,myexperiment,AAA11111111,1,0,1,0,1,0,0,0,0,0,,6.7,,,,good,,1,0,0,0,0,0,,,0,0,0,0,130,0,0,0,1,0,,1,1,0,1,0,,,,,,,data-211208-142020,44,mytask2,task_active,testing,,,,,1000,500,500,0,250,1500,4000,250,3,7,2,340,0.48,335,,,1692,1866,,,,,,,,,,,,,,,,,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 From 7919600eec9aea569af8d81066881bacd6df4c01 Mon Sep 17 00:00:00 2001 From: Julia Sprenger Date: Wed, 9 Nov 2022 10:58:20 +0100 Subject: [PATCH 08/13] Update templater to match latest name changes in BEP032Tools --- BEP032Templater.py | 4 ++-- tests/test_BEP032Templater.py | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/BEP032Templater.py b/BEP032Templater.py index 32192a4..3258445 100644 --- a/BEP032Templater.py +++ b/BEP032Templater.py @@ -279,8 +279,8 @@ def validate(self): # for session_kwargs in df.to_dict('index').values(): # session = BEP032TemplateData(**session_kwargs) # session.basedir = pathToDir -# session.generate_structure() -# session.register_data_files(*test_data_files) +# session.generate_directory_structure() +# session.register_data_sources(*test_data_files) # session.organize_data_files(mode='copy') # session.generate_all_metadata_files() # diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py index d54db6a..e9b7b5b 100644 --- a/tests/test_BEP032Templater.py +++ b/tests/test_BEP032Templater.py @@ -45,8 +45,8 @@ def test_generate_all_metadata(self): for i, data in enumerate(self.bep032_data_list): ses_number = self.diglab_dfs.loc[[i]]['ses_number'].values[0] exp_name = self.diglab_dfs.loc[[i]]['exp_name'].values[0] - data.generate_structure() - data.register_data_files(*self.test_data_files[i], run=ses_number, task=exp_name) + data.generate_directory_structure() + data.register_data_sources(*self.test_data_files[i], run=ses_number, task=exp_name) data.organize_data_files() data.generate_all_metadata_files() @@ -91,14 +91,14 @@ def setUp(self): # self.assertTrue(df_local) # self.assertTrue(str(df_abs).endswith(str(df_local))) # - # def test_generate_structure(self): - # self.bep032_data.generate_structure() + # def test_generate_directory_structure(self): + # self.bep032_data.generate_directory_structure() # df = self.bep032_data.get_data_folder() # self.assertTrue(df.exists()) # # def test_data_files(self): - # self.bep032_data.generate_structure() - # self.bep032_data.register_data_files(*self.test_data_files) + # self.bep032_data.generate_directory_structure() + # self.bep032_data.register_data_sources(*self.test_data_files) # self.bep032_data.organize_data_files() # # session_folder = self.bep032_data.get_data_folder() @@ -110,13 +110,13 @@ def setUp(self): # self.assertTrue(data_file.name.find("_ephys")) # # def test_data_files_complex(self): - # self.bep032_data.generate_structure() + # self.bep032_data.generate_directory_structure() # nix_files = [self.test_data_files[0]] * 3 # runs = ['run1', 'run2'] # tasks = ['task1', 'task2'] # for run in runs: # for task in tasks: - # self.bep032_data.register_data_files(*nix_files, + # self.bep032_data.register_data_sources(*nix_files, # run=run, task=task) # # self.bep032_data.organize_data_files() @@ -145,14 +145,14 @@ def setUp(self): # self.assertEqual(len(files), exp) # # def test_data_files_same_key(self): - # self.bep032_data.generate_structure() + # self.bep032_data.generate_directory_structure() # nix_files = [self.test_data_files[0]] # run = 'run1' # task = 'task1' # - # self.bep032_data.register_data_files(*nix_files, run=run, task=task) + # self.bep032_data.register_data_sources(*nix_files, run=run, task=task) # # register more data files in a second step - # self.bep032_data.register_data_files(*nix_files, run=run, task=task) + # self.bep032_data.register_data_sources(*nix_files, run=run, task=task) # # self.bep032_data.organize_data_files() # @@ -166,8 +166,8 @@ def setUp(self): # # def test_implemented_error_raised(self): # path = "" - # self.test_generate_structure() - # self.bep032_data.register_data_files(*self.test_data_files) + # self.test_generate_directory_structure() + # self.bep032_data.register_data_sources(*self.test_data_files) # self.bep032_data.organize_data_files() # self.bep032_data.generate_all_metadata_files() From 09614f9778c3d02f0971f27a1f3b097cfc6b9cba Mon Sep 17 00:00:00 2001 From: Julia Sprenger Date: Wed, 9 Nov 2022 11:04:02 +0100 Subject: [PATCH 09/13] update package description and test requirements --- setup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 5c6db21..339b7d7 100644 --- a/setup.py +++ b/setup.py @@ -16,15 +16,15 @@ packages=find_packages(), author="Julia Sprenger, Jeremy Garcia", - description="diglab2ando is a tool that allows automatically creating a directory where data and metadata from a " - "neuroscientific experiment are stored, and that follows the AnDO (Animal Data Organization) " - "specifications ( https://int-nit.github.io/AnDO/ ), using as input a filled pdf form or a redcap " - "survey/form generated using the DigLaB tool used at INT", + description="diglab2bids is a tool that allows automatically creating a directory where data and metadata from a " + "neuroscientific experiment are stored, and that follows the DigLab " + "specifications (https://github.com/INT-NIT/DigLabTools/), using as input a filled pdf form or a redcap " + "survey/form", license='MIT', install_requires=[], include_package_data=True, python_requires='>=3.6', extras_require={ - 'test': ['pytest'] + 'test': ['pytest', 'pandas'] } ) From 87f8dc4c64e5c3d9c846b2998e6510f84c42d5f6 Mon Sep 17 00:00:00 2001 From: Julia Sprenger Date: Wed, 9 Nov 2022 11:12:45 +0100 Subject: [PATCH 10/13] remove outdated updates --- BEP032Templater.py | 326 ---------------------------------- tests/test_BEP032Templater.py | 222 ----------------------- 2 files changed, 548 deletions(-) delete mode 100644 BEP032Templater.py delete mode 100644 tests/test_BEP032Templater.py diff --git a/BEP032Templater.py b/BEP032Templater.py deleted file mode 100644 index 3258445..0000000 --- a/BEP032Templater.py +++ /dev/null @@ -1,326 +0,0 @@ -import shutil -import argparse -import re - -import bep032tools.validator.BEP032Validator - -try: - import pandas as pd - - HAVE_PANDAS = True -except ImportError: - HAVE_PANDAS = False -from bep032tools.validator.BEP032Validator import build_rule_regexp -from bep032tools.rulesStructured import RULES_SET -from bep032tools.generator.utils import * -from bep032tools.generator.BEP032Generator import BEP032Data - -METADATA_LEVELS = {i: r['authorized_metadata_files'] for i, r in enumerate(RULES_SET)} -METADATA_LEVEL_BY_NAME = {build_rule_regexp(v)[0]: k for k, values in METADATA_LEVELS.items() for v - in values} - -# TODO: These can be extracted from the BEP032Data init definition. Check out the -# function inspection options -ESSENTIAL_CSV_COLUMNS = ['sub_id', 'ses_id'] -OPTIONAL_CSV_COLUMNS = ['tasks', 'runs'] - - -class BEP032TemplateData(BEP032Data): - """ - Representation of a BEP032 Data, as specified by in the - [ephys BEP](https://bids.neuroimaging.io/bep032) - - The BEP032Data object can track multiple realizations of `split`, `run`, `task` but only a - single realization of `session` and `subject`, i.e. to represent multiple `session` folders, - multiple BEP032Data objects are required. To include multiple realizations of tasks - or runs, call the `register_data` method for each set of parameters separately. - - Parameters - ---------- - sub_id : str - subject identifier, e.g. '0012' or 'j.s.smith' - ses-id : str - session identifier, e.g. '20210101' or '007' - tasks : str - task identifier of data files - runs : str - run identifier of data files - """ - - def __init__(self, sub_id, ses_id, diglab_df=None, project_name=None): - super().__init__(sub_id, ses_id, modality='ephys') - self.diglab_df = diglab_df - self.project_name = project_name - - def generate_metadata_file_participants(self, output): - assert self.sub_id == self.diglab_df['guid'].values[0] - participant_df = pd.DataFrame([['sub-' + self.sub_id]], columns=['participant_id']) - participant_df.set_index('participant_id', inplace=True) - if not output.with_suffix('.tsv').exists(): - save_tsv(participant_df, output) - - def generate_metadata_file_tasks(self, output): - # here we want to call save_json and save_tsv() - pass - - def generate_metadata_file_dataset_description(self, output): - dataset_dict = { - "Name": self.project_name, - "BIDSVersion": "1.6.0", - "License": "CC BY 4.0", - "Authors": self.diglab_df['user'].to_list(), - "Acknowledgements": "TBA", - "HowToAcknowledge": "TBA", - "Funding": ["TBA"], - "ReferencesAndLinks": "TBA", - "EthicsApprovals": self.diglab_df['ethical_protocol_id'].to_list() - } - save_json(dataset_dict, output) - - def generate_metadata_file_sessions(self, output): - session_df = pd.DataFrame([ - ['ses-' + self.ses_id, '2009-06-15T13:45:30', '120']], - columns=['session_id', 'acq_time', 'systolic_blood_pressure']) - session_df.set_index('session_id', inplace=True) - if not output.with_suffix('.tsv').exists(): - save_tsv(session_df, output) - - def generate_metadata_file_probes(self, output): - probes_df = pd.DataFrame([ - ['e380a', 'multi-shank', 0, 'iridium-oxide', 0, 0, 0, 'circle', 20], - ['e380b', 'multi-shank', 1.5, 'iridium-oxide', 0, 100, 0, 'circle', 20], - ['t420a', 'tetrode', 3.6, 'iridium-oxide', 0, 200, 0, 'circle', 20], - ['t420b', 'tetrode', 7, 'iridium-oxide', 500, 0, 0, 'circle', 20]], - columns=['probe_id', 'type', 'coordinate_space', 'material', 'x', 'y', 'z', 'shape', - 'contact_size']) - probes_df.set_index('probe_id', inplace=True) - save_tsv(probes_df, output) - - def generate_metadata_file_channels(self, output): - channels_df = pd.DataFrame([ - # [129, 1, 'neuronal', 'mV', 30000, 30, 'good'], - # [130, 3, 'neuronal', 'mV', 30000, 30, 'good'], - # [131, 5, 'neuronal', 'mV', 30000, 30, 'bad'], - # [132, 'n/a', 'sync_pulse', 'V', 1000, 1, 'n/a'] - ], - columns=['channel_id', 'contact_id', 'type', 'units', 'sampling_frequency', 'gain', - 'status']) - channels_df.set_index(['channel_id', 'contact_id'], inplace=True) - save_tsv(channels_df, output) - - def generate_metadata_file_contacts(self, output): - contact_df = pd.DataFrame([ - # [1, 'e380a', 0, 1.1, 'iridium-oxide', 0, 0, 0, 'circle', 20], - # [2, 'e380a', 0, 1.5, 'iridium-oxide', 0, 100, 0, 'circle', 20], - # [3, 'e380a', 0, 3.6, 'iridium-oxide', 0, 200, 0, 'circle', 20], - # [4, 'e380a', 1, 7, 'iridium-oxide', 500, 0, 0, 'circle', 20], - # [5, 'e380a', 1, 7, 'iridium-oxide', 500, 100, 0, 'circle', 20], - # [6, 'e380a', 1, 7, 'iridium-oxide', 500, 200, 0, 'circle', 20] - ], - columns=['contact_id', 'probe_id', 'shank_id', 'impedance', 'material', 'x', 'y', 'z', - 'shape', 'contact_size']) - contact_df.set_index(['contact_id', 'probe_id'], inplace=True) - save_tsv(contact_df, output) - - def _get_compressed_choices(self, question_label, active_value=1): - # extract columns belonging to this question (if it's a multiple choice question) - question_df = self.diglab_df.filter(regex=f'{question_label}___\w', axis=1) - # shorten column names and only use choices as column labels - question_df.columns = question_df.columns.str.replace(f'{question_label}___', '') - # extract choices that contain 'active' (selected) value - choices = question_df.columns[(question_df.values==active_value)[0]].to_list() - return choices - - def generate_metadata_file_ephys(self, output): - # extract selected modalities - modalities = self._get_compressed_choices('modality') - trialbeh = self._get_compressed_choices('subject_behaviour') - posttrialbeh = self._get_compressed_choices('subject_behaviour_2') - rewardfluidtype = self._get_compressed_choices('reward_fluid_type') - - if self.diglab_df['reward_fluid_type_other'].values[0]: - rewardfluidtype += self.diglab_df['reward_fluid_type_other'].values - - rewardothertype = self._get_compressed_choices('reward_other') - - - ephys_dict = { - # "PowerLineFrequency": 50, - # "PowerLineFrequencyUnit": "Hz", - # "Manufacturer": "OpenEphys", - # "ManufacturerModelName": "OpenEphys Starter Kit", - # "ManufacturerModelVersion": "", - # "SamplingFrequency": 30000, - # "SamplingFrequencyUnit": "Hz", - # "Location": "Institut de Neurosciences de la Timone, Faculté de Médecine, 27, " - # "Boulevard Jean Moulin, 13005 Marseille - France", - # "Software": "Cerebus", - # "SoftwareVersion": "1.5.1", - "Creator": self.diglab_df['user'].values[0], - # "Maintainer": "John Doe jr.", - # "Procedure": { - # "Pharmaceuticals": { - # "isoflurane": { - # "PharmaceuticalName": "isoflurane", - # "PharmaceuticalDoseAmount": 50, - # "PharmaceuticalDoseUnit": "ug/kg/min", - # }, - # "ketamine": { - # "PharmaceuticalName": "ketamine", - # "PharmaceuticalDoseAmount": 0.1, - # "PharmaceuticalDoseUnit": "ug/kg/min", - # }, - # }, - # }, - "Comments": self.diglab_df['comments_exp'].values[0], - "SessionNumber": self.diglab_df['ses_number'].values[0], - "Subject": { - "Weight": self.diglab_df['weight'].values[0], - "WeightUnit": 'kg', - "Comments": self.diglab_df['comments_subject'].values[0], - "SubjectBehaviour": trialbeh, - "PostTrialSubjectBehaviour": posttrialbeh, - }, - "SpecialEvents": {}, - "Modalities": modalities, - "Setup": { - "Comments": self.diglab_df['comments_setup'].values[0] - }, - "Rewards": { - "FluidType": rewardfluidtype, - "OtherType": rewardothertype, - }, - "DigLab": { - "record_id": self.diglab_df['record_id'].values[0], - # TODO: Fix test dataset - # "diglab_version": self.diglab_df['provenance_diglabtools_version'].values[0], - # "redcap_form_version": self.diglab_df['redcap_form_version'].values[0], - } - } - - for id in range(3): - if self.diglab_df[f'special_event_{id}'].values[0]: - ephys_dict["SpecialEvents"][id] = { - "Comment": self.diglab_df[f'special_event_{id}'].values[0], - "Time": self.diglab_df[f'special_event_time_{id}'].values[0] - } - - save_json(ephys_dict, output) - - def generate_metadata_file_scans(self, output): - # extract data quality value - quality = self._get_compressed_choices('data_quality') - - for key in self.data.keys(): - filename = f'ephys/sub-{self.sub_id}_ses-{self.ses_id}' - if key: - filename += key - filename += '.nix' - runs_df = pd.DataFrame([[ - filename, - self.diglab_df['date'].values[0], - self.diglab_df['exp_name'].values[0], - self.diglab_df['stimulation'].values[0], - self.diglab_df['subject_behaviour_multi'].values[0], - self.diglab_df['time_last_trial'].values[0], - quality, - self.diglab_df['incomplete'].values[0], - self.diglab_df['reward_fluid'].values[0], - 'ml', - self.diglab_df['reward_fluid_additional'].values[0] - ]], - columns=['filename', 'acq_date', 'exp_name', 'stimulation', 'post_trial_data', - 'time_last_trial', 'data_quality', 'incomplete_session', 'fluid_reward', - 'fluid_reward_unit', 'additional_fluid_reward']) - runs_df.set_index('filename', inplace=True) - save_tsv(runs_df, output) - - def validate(self): - """ - Validate the generated structure using the BEP032 validator - - Parameters - ---------- - output_folder: str - path to the folder to validate - - Returns - ---------- - bool - True if validation was successful. False if it failed. - """ - bep032tools.validator.BEP032Validator.is_valid(self.basedir) - - -# -# def generate_struct(csv_file, pathToDir): -# """ -# Create structure with csv file given in argument -# This file must contain a header row specifying the provided data. Accepted titles are -# defined in the BEP. -# Essential information of the following attributes needs to be present. -# Essential columns are 'sub_id' and 'ses_id'. -# -# Parameters -# ---------- -# csv_file: str -# Csv file that contains a list of directories to create. -# pathToDir: str -# Path to directory where the directories will be created. -# """ -# -# df = extract_structure_from_csv(csv_file) -# -# df = df[ESSENTIAL_CSV_COLUMNS] -# test_data_files = [Path('empty_ephys.nix')] -# for f in test_data_files: -# f.touch() -# -# for session_kwargs in df.to_dict('index').values(): -# session = BEP032TemplateData(**session_kwargs) -# session.basedir = pathToDir -# session.generate_directory_structure() -# session.register_data_sources(*test_data_files) -# session.organize_data_files(mode='copy') -# session.generate_all_metadata_files() -# -# # cleanup -# for f in test_data_files: -# if f.exists(): -# f.unlink() -# -# -# def main(): -# """ -# -# Notes -# ---------- -# -# Usage via command line: BEP032Generator.py [-h] pathToCsv pathToDir -# -# positional arguments: -# pathToCsv Path to your csv file -# -# pathToDir Path to your folder -# -# optional arguments: -# -h, --help show this help message and exit -# """ -# -# parser = argparse.ArgumentParser() -# parser.add_argument('pathToCsv', help='Path to your csv file') -# parser.add_argument('pathToDir', help='Path to your folder') -# -# # Create two argument groups -# -# args = parser.parse_args() -# -# # Check if directory exists -# if not os.path.isdir(args.pathToDir): -# print('Directory does not exist:', args.pathToDir) -# exit(1) -# generate_struct(args.pathToCsv, args.pathToDir) -# -# -# if __name__ == '__main__': -# main() diff --git a/tests/test_BEP032Templater.py b/tests/test_BEP032Templater.py deleted file mode 100644 index e9b7b5b..0000000 --- a/tests/test_BEP032Templater.py +++ /dev/null @@ -1,222 +0,0 @@ -import os -import unittest -from pathlib import Path -import pandas as pd -from utils import initialize_test_directory, test_directory -from BEP032Templater import BEP032TemplateData - - -class Test_BEP032Templater(unittest.TestCase): - def setUp(self): - test_dir = Path(initialize_test_directory(clean=True)) - self.tasks = None - self.runs = None - - sources = test_dir / 'sources' - sources.mkdir() - project = test_dir / 'project-A' - project.mkdir() - self.basedir = project - self.diglab_dfs = pd.read_csv('test_files/record.csv', header=0, na_filter=False, dtype=str) - record_ids = self.diglab_dfs.index.values - - self.bep032_data_list = [] - self.test_data_files = [] - self.test_mdata_files = [] - - for record_id in record_ids: - record = self.diglab_dfs.loc[[record_id]] - sub_id = record['guid'].values[0] - ses_id = record['date'].values[0] - d = BEP032TemplateData(sub_id, ses_id, diglab_df=record) - d.basedir = project - - self.bep032_data_list.append(d) - prefix = f'sub-{sub_id}_ses-{ses_id}' - self.test_data_files.append([sources / (prefix + '_ephy.nix')]) - self.test_mdata_files.append([sources / 'dataset_description.json', - sources / (prefix + '_probes.tsv'), - sources / (prefix + '_contacts.json')]) - - for f in self.test_mdata_files[-1] + self.test_data_files[-1]: - f.touch() - - def test_generate_all_metadata(self): - for i, data in enumerate(self.bep032_data_list): - ses_number = self.diglab_dfs.loc[[i]]['ses_number'].values[0] - exp_name = self.diglab_dfs.loc[[i]]['exp_name'].values[0] - data.generate_directory_structure() - data.register_data_sources(*self.test_data_files[i], run=ses_number, task=exp_name) - data.organize_data_files() - - data.generate_all_metadata_files() - - -class Test_BEP032TemplateData(unittest.TestCase): - - def setUp(self): - test_dir = Path(initialize_test_directory(clean=True)) - self.sub_id = 'sub5' - self.ses_id = 'ses1' - self.tasks = None - self.runs = None - - sources = test_dir / 'sources' - sources.mkdir() - project = test_dir / 'project-A' - project.mkdir() - self.basedir = project - - d = BEP032TemplateData(self.sub_id, self.ses_id) - d.basedir = project - - self.bep032_data = d - prefix = f'sub-{self.sub_id}_ses-{self.ses_id}' - self.test_data_files = [sources / (prefix + '_ephy.nix'), - sources / (prefix + '_ephy.nwb')] - self.test_mdata_files = [sources / 'dataset_description.json', - sources / (prefix + '_probes.tsv'), - sources / (prefix + '_contacts.json')] - - for f in self.test_mdata_files + self.test_data_files: - f.touch() - - # def test_get_data_folder(self): - # df = self.bep032_data.get_data_folder() - # self.assertTrue(df) - # - # df_abs = self.bep032_data.get_data_folder('absolute') - # df_local = self.bep032_data.get_data_folder('local') - # - # self.assertTrue(df_local) - # self.assertTrue(str(df_abs).endswith(str(df_local))) - # - # def test_generate_directory_structure(self): - # self.bep032_data.generate_directory_structure() - # df = self.bep032_data.get_data_folder() - # self.assertTrue(df.exists()) - # - # def test_data_files(self): - # self.bep032_data.generate_directory_structure() - # self.bep032_data.register_data_sources(*self.test_data_files) - # self.bep032_data.organize_data_files() - # - # session_folder = self.bep032_data.get_data_folder() - # self.assertTrue(session_folder.exists()) - # data_files = list(session_folder.glob('*.nix')) - # data_files += list(session_folder.glob('*.nwb')) - # self.assertEqual(len(self.test_data_files), len(data_files)) - # for data_file in data_files: - # self.assertTrue(data_file.name.find("_ephys")) - # - # def test_data_files_complex(self): - # self.bep032_data.generate_directory_structure() - # nix_files = [self.test_data_files[0]] * 3 - # runs = ['run1', 'run2'] - # tasks = ['task1', 'task2'] - # for run in runs: - # for task in tasks: - # self.bep032_data.register_data_sources(*nix_files, - # run=run, task=task) - # - # self.bep032_data.organize_data_files() - # - # session_folder = self.bep032_data.get_data_folder() - # self.assertTrue(session_folder.exists()) - # data_files = list(session_folder.glob('*.nix')) - # self.assertEqual(len(data_files), len(runs) * len(tasks) * len(nix_files)) - # - # for data_file in data_files: - # self.assertTrue(data_file.name.find("_ephys")) - # - # for run in runs: - # exp = len(tasks) * len(nix_files) - # files = list(session_folder.glob(f'*_run-{run}*.nix')) - # self.assertEqual(len(files), exp) - # - # for task in tasks: - # exp = len(runs) * len(nix_files) - # files = list(session_folder.glob(f'*_task-{task}*.nix')) - # self.assertEqual(len(files), exp) - # - # for split in range(len(nix_files)): - # exp = len(runs) * len(tasks) - # files = list(session_folder.glob(f'*_split-{split}*.nix')) - # self.assertEqual(len(files), exp) - # - # def test_data_files_same_key(self): - # self.bep032_data.generate_directory_structure() - # nix_files = [self.test_data_files[0]] - # run = 'run1' - # task = 'task1' - # - # self.bep032_data.register_data_sources(*nix_files, run=run, task=task) - # # register more data files in a second step - # self.bep032_data.register_data_sources(*nix_files, run=run, task=task) - # - # self.bep032_data.organize_data_files() - # - # session_folder = self.bep032_data.get_data_folder() - # self.assertTrue(session_folder.exists()) - # data_files = list(session_folder.glob('*.nix')) - # self.assertEqual(len(data_files), 2) - # - # for data_file in data_files: - # self.assertTrue(data_file.name.find(f"_task-{task}_run-{run}_split-")) - # - # def test_implemented_error_raised(self): - # path = "" - # self.test_generate_directory_structure() - # self.bep032_data.register_data_sources(*self.test_data_files) - # self.bep032_data.organize_data_files() - # self.bep032_data.generate_all_metadata_files() - - def tearDown(self): - initialize_test_directory(clean=True) - - -# class Test_ReadCsv(unittest.TestCase): -# -# def setUp(self): -# csv_filename = generate_simple_csv_file() -# self.csv_file = csv_filename -# -# def test_read_csv(self): -# df = extract_structure_from_csv(self.csv_file) -# expected_headers = ['sub_id', 'ses_id'] -# self.assertListEqual(expected_headers, list(df)) -# -# -# class Test_GenerateStruct(unittest.TestCase): -# -# def setUp(self): -# initialize_test_directory(clean=True) -# csv_filename = generate_simple_csv_file() -# self.csv_file = csv_filename -# -# def test_generate_example_structure(self): -# generate_struct(self.csv_file, test_directory) -# # extract all paths that exist in the test directory -# existing_paths = [p[0] for p in os.walk(test_directory)] -# -# # find path that is corresponding to each line of the csv file -# with open(self.csv_file) as f: -# header = f.readline() -# # iterate through sessions -# for line in f.readlines(): -# found_path = False -# for existing_path in existing_paths: -# if all(key in existing_path for key in line.strip().split(',')): -# found_path = True -# break -# if not found_path: -# print(line.strip().split(',')) -# -# self.assertTrue(found_path) -# -# def doCleanups(self): -# initialize_test_directory(clean=True) - - -if __name__ == '__main__': - unittest.main() From dd02b1bcab99443e688acc99a3f0e556ef684e37 Mon Sep 17 00:00:00 2001 From: Julia Sprenger Date: Wed, 9 Nov 2022 11:18:37 +0100 Subject: [PATCH 11/13] remove outdated requirement --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 339b7d7..1a96503 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,6 @@ include_package_data=True, python_requires='>=3.6', extras_require={ - 'test': ['pytest', 'pandas'] + 'test': ['pytest'] } ) From 7453b596ba4f9823c9a16cbb1fce3e0bc1add738 Mon Sep 17 00:00:00 2001 From: Julia Sprenger Date: Wed, 9 Nov 2022 12:04:50 +0100 Subject: [PATCH 12/13] update to latest BEP032tools version --- diglab2bids.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/diglab2bids.py b/diglab2bids.py index 7b80771..ef4ea81 100644 --- a/diglab2bids.py +++ b/diglab2bids.py @@ -41,7 +41,7 @@ def get_metadata(conf, format): return records -def convert_to_bids(records, OUTPUT_FOLDER): +def convert_to_bids(records, files_per_record, OUTPUT_FOLDER): """ Parameters @@ -53,18 +53,14 @@ def convert_to_bids(records, OUTPUT_FOLDER): Returns ---------- """ - for record_dict in records: + for files, record_dict in zip(files_per_record, records): sub_id, ses_id = get_sub_ses_ids(record_dict) gen = BEP032Data(sub_id, ses_id, modality='ephys') - files = gen.generate_data_files() gen.register_data_files(files) gen.basedir = OUTPUT_FOLDER - gen.generate_structure() - files = gen.generate_metadata_files() - gen.register_metadata_files(files) - - - # generate_metadata_files(record_dict, gen.get_data_folder()) + gen.generate_directory_structure() + gen.generate_all_metadata_files(record_dict) + gen.organize_data_files() def get_sub_ses_ids(record_dict): From f58012ffdc6fd618127ccbab003bd890ac8be1a1 Mon Sep 17 00:00:00 2001 From: Julia Sprenger Date: Wed, 7 Dec 2022 15:46:42 +0100 Subject: [PATCH 13/13] Renaming ando -> bids --- tests/{test_diglab2ando.py => test_diglab2bids.py} | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) rename tests/{test_diglab2ando.py => test_diglab2bids.py} (61%) diff --git a/tests/test_diglab2ando.py b/tests/test_diglab2bids.py similarity index 61% rename from tests/test_diglab2ando.py rename to tests/test_diglab2bids.py index b9c9a11..be01493 100644 --- a/tests/test_diglab2ando.py +++ b/tests/test_diglab2bids.py @@ -1,8 +1,10 @@ from unittest import TestCase - +from diglab2bids import convert_to_bids # Here for test on dependency class Test(TestCase): - def test_empty(self): + def test_convert_to_bids(self): + + self.assertEqual(True, True)