@@ -74,22 +74,20 @@ def convert_maxquant_psm_cmd(
74
74
if not all ([msms_file , output_folder ]):
75
75
raise click .UsageError ("ERROR: Please provide all required parameters" )
76
76
77
- # Ensure output directory exists
78
77
output_folder = Path (output_folder )
79
78
output_folder .mkdir (parents = True , exist_ok = True )
80
79
logger .info (f"Using output directory: { output_folder } " )
81
80
82
- # Set default prefix if not provided
83
81
prefix = output_prefix or "psm"
84
82
filename = create_uuid_filename (prefix , ".psm.parquet" )
85
83
output_path = output_folder / filename
86
84
logger .info (f"Will save PSM file as: { filename } " )
87
85
88
86
logger .info ("Initializing MaxQuant PSM converter..." )
89
- mq = MaxQuant ()
87
+ processor = MaxQuant ()
90
88
91
89
logger .info (f"Starting PSM conversion (batch size: { batch_size :,} )..." )
92
- mq . write_psm_to_file (
90
+ processor . process_psm_file (
93
91
msms_path = str (msms_file ), output_path = str (output_path ), chunksize = batch_size
94
92
)
95
93
logger .info (f"PSM file successfully saved to: { output_path } " )
@@ -126,6 +124,11 @@ def convert_maxquant_psm_cmd(
126
124
help = "Protein file with specific requirements" ,
127
125
type = click .Path (exists = True , dir_okay = False , path_type = Path ),
128
126
)
127
+ @click .option (
128
+ "--protein-groups-file" ,
129
+ help = "MaxQuant proteinGroups.txt file for Q-value mapping (optional)" ,
130
+ type = click .Path (exists = True , dir_okay = False , path_type = Path ),
131
+ )
129
132
@click .option (
130
133
"--partitions" ,
131
134
help = "Field(s) used for splitting files (comma-separated)" ,
@@ -146,6 +149,7 @@ def convert_maxquant_feature_cmd(
146
149
sdrf_file : Path ,
147
150
output_folder : Path ,
148
151
protein_file : Optional [Path ],
152
+ protein_groups_file : Optional [Path ],
149
153
partitions : Optional [str ],
150
154
batch_size : int ,
151
155
output_prefix : Optional [str ],
@@ -162,6 +166,7 @@ def convert_maxquant_feature_cmd(
162
166
--evidence-file evidence.txt \\
163
167
--sdrf-file data.sdrf.tsv \\
164
168
--output-folder ./output \\
169
+ --protein-groups-file proteinGroups.txt \\
165
170
--batch-size 1000000
166
171
"""
167
172
logger = get_logger ("quantmsio.commands.maxquant" )
@@ -173,44 +178,43 @@ def convert_maxquant_feature_cmd(
173
178
if not all ([evidence_file , sdrf_file , output_folder ]):
174
179
raise click .UsageError ("ERROR: Please provide all required parameters" )
175
180
176
- # Ensure output directory exists
177
181
output_folder = Path (output_folder )
178
182
output_folder .mkdir (parents = True , exist_ok = True )
179
183
logger .info (f"Using output directory: { output_folder } " )
180
184
181
- # Set default prefix if not provided
182
185
prefix = output_prefix or "feature"
183
186
filename = create_uuid_filename (prefix , ".feature.parquet" )
184
187
output_path = output_folder / filename
185
188
logger .info (f"Will save feature file as: { filename } " )
186
189
187
190
logger .info ("Initializing MaxQuant feature converter..." )
188
- mq = MaxQuant ()
191
+ processor = MaxQuant ()
189
192
190
193
if not partitions :
191
194
logger .info (f"Starting feature conversion (batch size: { batch_size :,} )..." )
192
- mq .write_feature_to_file (
195
+
196
+ if protein_groups_file :
197
+ logger .info (
198
+ f"Using proteinGroups file for Q-value mapping: { protein_groups_file } "
199
+ )
200
+ processor ._init_protein_group_qvalue_mapping (str (protein_groups_file ))
201
+ else :
202
+ logger .info (
203
+ "No proteinGroups file provided, auto-detection will be used"
204
+ )
205
+
206
+ processor .process_feature_file (
193
207
evidence_path = str (evidence_file ),
194
- sdrf_path = str (sdrf_file ),
195
208
output_path = str (output_path ),
196
- chunksize = batch_size ,
209
+ sdrf_path = str ( sdrf_file ) ,
197
210
protein_file = str (protein_file ) if protein_file else None ,
211
+ chunksize = batch_size ,
198
212
)
199
213
logger .info (f"Feature file successfully saved to: { output_path } " )
200
214
else :
201
- logger .info (f"Starting partitioned feature conversion using: { partitions } " )
202
- partition_list = partitions .split ("," )
203
- mq .write_features_to_file (
204
- evidence_path = str (evidence_file ),
205
- sdrf_path = str (sdrf_file ),
206
- output_folder = str (output_folder ),
207
- filename = filename ,
208
- partitions = partition_list ,
209
- chunksize = batch_size ,
210
- protein_file = str (protein_file ) if protein_file else None ,
211
- )
212
- logger .info (
213
- f"Partitioned feature files successfully saved to: { output_folder } "
215
+ logger .error ("Partitioned conversion not implemented" )
216
+ raise click .ClickException (
217
+ "Partitioned conversion feature is not yet available. Please use the standard conversion without --partitions."
214
218
)
215
219
216
220
except Exception as e :
@@ -240,14 +244,9 @@ def convert_maxquant_feature_cmd(
240
244
required = True ,
241
245
type = click .Path (file_okay = False , path_type = Path ),
242
246
)
243
- @click .option (
244
- "--protein-file" ,
245
- help = "Protein file with specific requirements" ,
246
- type = click .Path (exists = True , dir_okay = False , path_type = Path ),
247
- )
248
247
@click .option (
249
248
"--batch-size" ,
250
- help = "Read batch size" ,
249
+ help = "Batch size (for logging purposes only) " ,
251
250
default = 1000000 ,
252
251
type = int ,
253
252
)
@@ -260,7 +259,6 @@ def convert_maxquant_pg_cmd(
260
259
protein_groups_file : Path ,
261
260
sdrf_file : Path ,
262
261
output_folder : Path ,
263
- protein_file : Optional [Path ],
264
262
batch_size : int ,
265
263
output_prefix : Optional [str ],
266
264
verbose : bool = False ,
@@ -287,29 +285,25 @@ def convert_maxquant_pg_cmd(
287
285
if not all ([protein_groups_file , sdrf_file , output_folder ]):
288
286
raise click .UsageError ("ERROR: Please provide all required parameters" )
289
287
290
- # Ensure output directory exists
291
288
output_folder = Path (output_folder )
292
289
output_folder .mkdir (parents = True , exist_ok = True )
293
290
logger .info (f"Using output directory: { output_folder } " )
294
291
295
- # Set default prefix if not provided
296
292
prefix = output_prefix or "pg"
297
293
filename = create_uuid_filename (prefix , ".pg.parquet" )
298
294
output_path = output_folder / filename
299
295
logger .info (f"Will save protein groups file as: { filename } " )
300
296
301
297
logger .info ("Initializing MaxQuant protein groups converter..." )
302
- mq = MaxQuant ()
298
+ processor = MaxQuant ()
303
299
304
300
logger .info (
305
301
f"Starting protein groups conversion (batch size: { batch_size :,} )..."
306
302
)
307
- mq . write_protein_groups_to_file (
303
+ processor . process_pg_file (
308
304
protein_groups_path = str (protein_groups_file ),
309
- sdrf_path = str (sdrf_file ),
310
305
output_path = str (output_path ),
311
- chunksize = batch_size ,
312
- protein_file = str (protein_file ) if protein_file else None ,
306
+ sdrf_path = str (sdrf_file ),
313
307
)
314
308
logger .info (f"Protein groups file successfully saved to: { output_path } " )
315
309
@@ -318,3 +312,7 @@ def convert_maxquant_pg_cmd(
318
312
f"Error in MaxQuant protein groups conversion: { str (e )} " , exc_info = True
319
313
)
320
314
raise click .ClickException (f"Error: { str (e )} \n Check the logs for more details." )
315
+
316
+
317
+ if __name__ == "__main__" :
318
+ convert ()
0 commit comments