Skip to content

Commit adc53be

Browse files
committed
Get intergenic regions form a GenBank file. Return as a multi-fasta file
Reverse-complement those that need to be revcomped.
1 parent 76081aa commit adc53be

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

get_intergenic.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
import sys
3+
import Bio
4+
from Bio import SeqIO, SeqFeature
5+
from Bio.SeqRecord import SeqRecord
6+
import os
7+
# Copyright(C) 2009 Iddo Friedberg & Ian MC Fleming
8+
# Released under Biopython license. http://www.biopython.org/DIST/LICENSE
9+
# Do not remove this comment
10+
def get_interregions(genbank_path,intergene_length=100):
11+
seq_record = SeqIO.parse(open(genbank_path), "genbank").next()
12+
13+
cds_list = []
14+
intergenic_records = []
15+
# Loop over the genome file, get the
16+
for fnum, feature in enumerate(seq_record.features):
17+
if feature.type == 'CDS':
18+
mystart = feature.location._start.position
19+
myend = feature.location._end.position
20+
cds_list.append((mystart,myend,feature.strand))
21+
for i,pospair in enumerate(cds_list[1:]):
22+
# Compare current start position to previous end position
23+
last_end = cds_list[i][1]
24+
this_start = pospair[0]
25+
strand = pospair[2]
26+
if this_start - last_end >= intergene_length:
27+
intergene_seq = seq_record.seq[last_end:this_start]
28+
if strand == -1:
29+
intergene_seq = intergene_seq.reverse_complement()
30+
strand_string = "-"
31+
else:
32+
strand_string = "+"
33+
intergenic_records.append(
34+
SeqRecord(intergene_seq,id="%s-ign-%d" % (seq_record.name,i),
35+
description="%s %d-%d %s" % (seq_record.name, last_end+1,
36+
this_start,strand_string)))
37+
outpath = os.path.splitext(os.path.basename(genbank_path))[0] + ".ign"
38+
SeqIO.write(intergenic_records, open(outpath,"w"), "fasta")
39+
40+
41+
if __name__ == '__main__':
42+
if len(sys.argv) == 2:
43+
get_interregions(sys.argv[1])
44+
elif len(sys.argv) == 3:
45+
get_interregions(sys.argv[1],int(sys.argv[2]))
46+
else:
47+
print "Usage: get_intergenic.py gb_file [intergenic_length]"
48+
sys.exit(0)
49+

0 commit comments

Comments
 (0)