Skip to content

Commit 259e7bb

Browse files
committed
flexible disk space allocation for RNA-seq
1 parent 7259953 commit 259e7bb

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
[tool.poetry]
66
name = "Benchmark-4dn"
7-
version = "0.5.25"
7+
version = "0.5.26"
88
description = """Benchmark functions that returns total space, mem, cpu given \
99
input size and parameters for the CWL workflows"""
1010
authors = ["Soo Lee <[email protected]>"]

src/Benchmark/bfunctions.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,19 @@
44

55
def encode_rnaseq_stranded(input_json):
66
assert 'input_size_in_bytes' in input_json
7-
r = BenchmarkResult(size=300,
7+
insz = input_json['input_size_in_bytes']
8+
input_sizes = 0
9+
for file,file_size in insz.items():
10+
input_sizes += file_size
11+
# The starIndex file is the main input size difference, meaning
12+
# disk requirements vary by organism. 24 GB+ is usually mouse and
13+
# < 24 GB is usually human. As human sets approach 24 GB, the disk
14+
# requirement is less predictable, so disk may need to manually set.
15+
if B2GB(input_sizes) < 24:
16+
total_size_in_gb = B2GB((input_sizes) * 7) + 21
17+
else:
18+
total_size_in_gb = B2GB((input_sizes) * 10.1) - 150
19+
r = BenchmarkResult(size=total_size_in_gb,
820
mem=GB2MB(64),
921
cpu=16,
1022
exclude_t=True)
@@ -13,7 +25,21 @@ def encode_rnaseq_stranded(input_json):
1325

1426
def encode_rnaseq_unstranded(input_json):
1527
assert 'input_size_in_bytes' in input_json
16-
r = BenchmarkResult(size=300,
28+
insz = input_json['input_size_in_bytes']
29+
assert 'rna.align_index' in insz
30+
input_sizes = 0
31+
for file,file_size in insz.items():
32+
# As in stranded case, align index is the main contributor,
33+
# but now total file size cannot determine organism.
34+
# Instead, use the size of the align index itself
35+
if file == 'rna.align_index':
36+
mouse = B2GB(file_size) > 13
37+
input_sizes += file_size
38+
if mouse:
39+
total_size_in_gb = B2GB((input_sizes) * 8) - 105
40+
else:
41+
total_size_in_gb = B2GB((input_sizes) * 5.5) + 35
42+
r = BenchmarkResult(size=total_size_in_gb,
1743
mem=GB2MB(64),
1844
cpu=16,
1945
exclude_t=True)

tests/test.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,27 @@ class TestBenchmark(unittest.TestCase):
2121
def test_encode_rnaseq_unstranded(self):
2222
print("rnaseq_unstranded")
2323
input_json = {'input_size_in_bytes': {'rna.fastqs_R1': GB2B(10),
24-
'rna.align_index': GB2B(3)}}
24+
'rna.align_index': GB2B(10)}}
2525
res = B.benchmark('encode-rnaseq-unstranded', input_json)
2626
print(res)
2727
assert 'aws' in res
2828
assert 'recommended_instance_type' in res['aws']
2929
assert res['aws']['recommended_instance_type'] == 'm5a.4xlarge'
30+
assert 'total_size_in_GB' in res
31+
assert int(res['total_size_in_GB']) == 145
3032

3133

3234
def test_encode_rnaseq_stranded(self):
3335
print("rnaseq_stranded")
3436
input_json = {'input_size_in_bytes': {'rna.fastqs_R1': GB2B(10),
35-
'rna.align_index': GB2B(3)}}
37+
'rna.align_index': GB2B(10)}}
3638
res = B.benchmark('encode-rnaseq-stranded', input_json)
3739
print(res)
3840
assert 'aws' in res
3941
assert 'recommended_instance_type' in res['aws']
4042
assert res['aws']['recommended_instance_type'] == 'm5a.4xlarge'
43+
assert 'total_size_in_GB' in res
44+
assert int(res['total_size_in_GB']) == 161
4145

4246

4347
def test_repliseq(self):

0 commit comments

Comments
 (0)