File tree Expand file tree Collapse file tree 2 files changed +25
-2
lines changed Expand file tree Collapse file tree 2 files changed +25
-2
lines changed Original file line number Diff line number Diff line change @@ -136,7 +136,8 @@ def get_md5(path):
136
136
start = time .time ()
137
137
status = branch .do_fastmultigather (
138
138
outpath ,
139
- "prepare-db/animals-and-gtdb.rocksdb" ,
139
+ # "prepare-db/animals-and-gtdb.rocksdb",
140
+ "prepare-db/plants+animals+gtdb.rocksdb" ,
140
141
0 ,
141
142
KSIZE ,
142
143
SCALED ,
@@ -157,7 +158,7 @@ def get_md5(path):
157
158
158
159
# load/process
159
160
gather_df = pd .read_csv (csv_filename )
160
- gather_df = gather_df [gather_df ["f_unique_weighted" ] >= 0.1 ]
161
+ gather_df = gather_df [gather_df ["f_unique_weighted" ] >= 0.001 ]
161
162
if len (gather_df ):
162
163
last_row = gather_df .tail (1 ).squeeze ()
163
164
sum_weighted_found = last_row ["sum_weighted_found" ]
Original file line number Diff line number Diff line change @@ -11,6 +11,7 @@ GENOMES = ['bosTau9',
11
11
rule all :
12
12
input :
13
13
'animals-and-gtdb.rocksdb' ,
14
+ 'plants+animals+gtdb.rocksdb' ,
14
15
15
16
rule index_animals_and_gtdb :
16
17
input :
@@ -32,6 +33,27 @@ rule animals_and_gtdb:
32
33
"""
33
34
34
35
36
+ rule index_plants_animals_and_gtdb :
37
+ input :
38
+ "plants+animals+gtdb.mf.csv" ,
39
+ output :
40
+ protected (directory ("plants+animals+gtdb.rocksdb" )),
41
+ shell : """
42
+ sourmash scripts index {input} -o {output} -k 51
43
+ """
44
+
45
+ rule plants_animals_and_gtdb :
46
+ input :
47
+ expand ('outputs/{g}.k51.100k.sig.zip' , g = GENOMES ),
48
+ 'outputs/all-gtdb-rs220.k51.100k.sig.zip' ,
49
+ 'downloads/genbank-plants-merged-2024.07.sig.zip' ,
50
+ output :
51
+ 'plants+animals+gtdb.mf.csv' ,
52
+ shell : """
53
+ sourmash sig collect -F csv {input} -o {output} --abspath
54
+ """
55
+
56
+
35
57
rule downsample :
36
58
input :
37
59
"downloads/{g}.sig.zip" ,
You can’t perform that action at this time.
0 commit comments