diff --git a/alphaquant/config/quant_reader_config.yaml b/alphaquant/config/quant_reader_config.yaml index 0b253cda..ffb79612 100644 --- a/alphaquant/config/quant_reader_config.yaml +++ b/alphaquant/config/quant_reader_config.yaml @@ -863,6 +863,9 @@ spectronaut_fragion_ms1_gene: value: "True" use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore spectronaut_fragion_ms1_gene: @@ -912,6 +915,9 @@ spectronaut_fragion_ms1_gene: value: 5.0 use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore spectronaut_precursor_fragion_ms1: format: longtable @@ -971,6 +977,9 @@ spectronaut_precursor_fragion_ms1: value: "True" use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore spectronaut_precursor_fragion_ms1: @@ -1027,6 +1036,9 @@ spectronaut_precursor_fragion_ms1: value: 5.0 use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore spectronaut_precursor_fragion_ms1_protein: @@ -1083,6 +1095,9 @@ spectronaut_precursor_fragion_ms1_protein: value: 5.0 use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore spectronaut_fragion_ms1_protein: @@ -1132,6 +1147,9 @@ spectronaut_fragion_ms1_protein: value: 5.0 use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore annotation_columns: - PG.Genes @@ -1170,6 +1188,9 @@ spectronaut_fragion_protein: value: 5.0 use_iontree: True ml_level: CHARGE + variance_predictor_cols: + - EG.Cscore + - FG.ShapeQualityScore annotation_columns: - PG.Genes @@ -1342,8 +1363,25 @@ diaumpire_precursor_ms1: ion_cols: - Peptide Key +fragpipe_precursor: + format: widetable + quant_pre_or_suffix: " Intensity" + protein_cols: + - Protein + ion_hierarchy: + sequence_int: + order: [SEQ, MOD, CHARGE] + mapping: + SEQ: + - Peptide Sequence + MOD: + - Modified Sequence + CHARGE: + - Charge + use_iontree: False + ml_level: SEQ -fragpipe_precursors: +fragpipe_modseq: format: widetable quant_pre_or_suffix: " Intensity" protein_cols: @@ -1358,3 +1396,6 @@ fragpipe_precursors: - Modified Sequence use_iontree: False ml_level: SEQ + + + diff --git a/alphaquant/quant_reader/table_reformatter.py b/alphaquant/quant_reader/table_reformatter.py index 9c3ebf0d..6dc17d8f 100644 --- a/alphaquant/quant_reader/table_reformatter.py +++ b/alphaquant/quant_reader/table_reformatter.py @@ -72,12 +72,11 @@ def merge_protein_cols_and_config_dict( def join_columns(df, columns, separator="_"): if len(columns) == 1: - return df[columns[0]].fillna("nan").infer_objects(copy=False).astype(str) + return df[columns[0]].fillna("nan").astype(str) else: return ( df[columns] .fillna("nan") - .infer_objects(copy=False) .astype(str) .agg(separator.join, axis=1) ) diff --git a/alphaquant/tables/alphadia_reader.py b/alphaquant/tables/alphadia_reader.py index f7f421ec..63be1412 100644 --- a/alphaquant/tables/alphadia_reader.py +++ b/alphaquant/tables/alphadia_reader.py @@ -26,17 +26,23 @@ def __init__(self, fragment_matrix_file: str): fragment_matrix_file (str): Path to the fragment matrix file """ - self.ml_info_file = aq_utils.get_progress_folder_filename(fragment_matrix_file, ".ml_info_table.tsv") + self.ml_info_file = aq_utils.get_progress_folder_filename(fragment_matrix_file, ".ml_info_table.tsv.zip") + self.old_ml_info_file = aq_utils.get_progress_folder_filename(fragment_matrix_file, ".ml_info_table.tsv") self.input_file_reformat = aq_utils.get_progress_folder_filename(fragment_matrix_file, ".alphadia_fragion.aq_reformat.tsv", remove_extension=False) precursor_file = os.path.join(os.path.dirname(fragment_matrix_file), "precursors.tsv") self._precursor_df = aq_reader_utils.read_file(precursor_file, sep="\t") self._precursor2quantID = self._precursor2quantid() - if not os.path.exists(self.ml_info_file): + if os.path.exists(self.old_ml_info_file) and not os.path.exists(self.ml_info_file): + self.ml_info_file = self.old_ml_info_file + LOGGER.info(f"ML info file already exists at {self.ml_info_file}") + elif not os.path.exists(self.ml_info_file): LOGGER.info(f"Creating ML info file") self.ml_info_df = self._define_ml_info_table() - self.ml_info_df.to_csv(self.ml_info_file, sep="\t", index=False) + archive_name = os.path.basename(self.ml_info_file).removesuffix(".zip") + compression = {"method": "zip", "archive_name": archive_name} + self.ml_info_df.to_csv(self.ml_info_file, sep="\t", index=False, compression=compression) else: LOGGER.info(f"ML info file already exists at {self.ml_info_file}")