6
6
by genomes.yml that have already been executed and appear in the target
7
7
installed data table configuration.
8
8
"""
9
+ import json
9
10
import logging
10
11
import os
11
12
import re
38
39
from ._config_models import (
39
40
DataManager ,
40
41
DataManagers ,
42
+ DictOrValue ,
41
43
read_data_managers ,
42
44
)
43
45
from .common_parser import get_common_args
@@ -96,8 +98,7 @@ def tool_id_for(indexer: str, data_managers: DataManagers, mode: str) -> str:
96
98
class RunDataManager (BaseModel ):
97
99
id : str
98
100
items : Optional [List [Any ]] = None
99
- params : Optional [List [Any ]] = None
100
- data_table_reload : Optional [List [str ]] = None
101
+ params : Optional [DictOrValue ] = None
101
102
102
103
103
104
class RunDataManagers (BaseModel ):
@@ -172,36 +173,34 @@ def walk_over_incomplete_runs(split_options: SplitOptions):
172
173
if do_fetch and not split_options .is_build_complete (build_id , fetch_indexer ):
173
174
log .info (f"Fetching: { build_id } " )
174
175
fetch_tool_id = tool_id_for (fetch_indexer , data_managers , split_options .tool_id_mode )
175
- fetch_params = []
176
- fetch_params .append ({"dbkey_source|dbkey_source_selector" : "new" })
177
- fetch_params .append ({"dbkey_source|dbkey" : genome ["id" ]})
178
176
description = genome .get ("description" )
177
+ fetch_params = {
178
+ "dbkey_source" : {"dbkey_source_selector" : "new" , "dbkey" : genome ["id" ]},
179
+ "sequence_id" : genome ["id" ],
180
+ "sequence_name" : description ,
181
+ }
179
182
source = genome .get ("source" )
180
183
if source == "ucsc" :
181
184
if not description :
182
- description = ucsc_description_for_build (genome ["id" ])
183
- fetch_params .append ({"reference_source|reference_source_selector" : "ucsc" })
184
- fetch_params .append ({"reference_source|requested_dbkey" : genome ["id" ]})
185
- fetch_params .append ({"sequence_name" : description })
185
+ fetch_params ["sequence_name" ] = ucsc_description_for_build (genome ["id" ])
186
+ fetch_params ["reference_source" ] = {
187
+ "reference_source_selector" : "ucsc" ,
188
+ "requested_dbkey" : genome ["id" ],
189
+ }
186
190
elif re .match ("^[A-Z_]+[0-9.]+" , source ):
187
- fetch_params . append ({ "reference_source|reference_source_selector" : "ncbi" })
188
- fetch_params . append ({ "reference_source|requested_identifier " : source })
189
- fetch_params . append ({ "sequence_name " : genome [ "description" ]})
190
- fetch_params . append ({ "sequence.id" : genome [ "id" ]})
191
+ fetch_params [ "reference_source" ] = {
192
+ "reference_source_selector " : "ncbi" ,
193
+ "requested_identifier " : source ,
194
+ }
191
195
elif re .match ("^http" , source ):
192
- fetch_params .append ({"reference_source|reference_source_selector" : "url" })
193
- fetch_params .append ({"reference_source|user_url" : source })
194
- fetch_params .append ({"sequence_name" : genome ["description" ]})
195
- fetch_params .append ({"sequence.id" : genome ["id" ]})
196
+ fetch_params ["reference_source" ] = {"reference_source_selector" : "url" , "user_url" : source }
196
197
197
198
if description :
198
- fetch_params . append ({ "dbkey_source| dbkey_name" : description })
199
+ fetch_params [ "dbkey_source" ][ " dbkey_name"] = description
199
200
200
201
fetch_run_data_manager = RunDataManager (
201
202
id = fetch_tool_id ,
202
203
params = fetch_params ,
203
- # Not needed according to Marius
204
- # data_table_reload=["all_fasta", "__dbkeys__"],
205
204
)
206
205
yield (build_id , fetch_indexer , fetch_run_data_manager )
207
206
else :
@@ -223,18 +222,17 @@ def walk_over_incomplete_runs(split_options: SplitOptions):
223
222
224
223
tool_id = tool_id_for (indexer , data_managers , split_options .tool_id_mode )
225
224
data_manager = data_managers .__root__ [indexer ]
226
- params = data_manager .parameters
225
+ params = {}
226
+ if data_manager .parameters :
227
+ params = json .loads (data_manager .parameters .json ()) or {}
228
+ genome_params = genome .pop ("parameters" , None ) or {}
229
+ params .update (genome_params )
227
230
if params is None :
228
- params = [
229
- {"all_fasta_source" : "{{ item.id }}" },
230
- {"sequence_name" : "{{ item.name }}" },
231
- {"sequence_id" : "{{ item.id }}" },
232
- ]
233
- # why is this not pulled from the data managers conf? -nate
234
- if re .search ("bwa" , tool_id ):
235
- params .append ({"index_algorithm" : "bwtsw" })
236
- if re .search ("color_space" , tool_id ):
237
- continue
231
+ params = {
232
+ "all_fasta_source" : "{{ item.id }}" ,
233
+ "sequence_name" : "{{ item.name }}" ,
234
+ "sequence_id" : "{{ item.id }}" ,
235
+ }
238
236
239
237
item = deepcopy (genome )
240
238
item .pop ("indexers" , None )
0 commit comments