-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextractEggnogDescription.py
More file actions
91 lines (60 loc) · 2.03 KB
/
extractEggnogDescription.py
File metadata and controls
91 lines (60 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
'''
@auther: Samaneh
'''
from Bio import SeqIO
import pandas as pd
import numpy as np
#import xlsxwriter
import re
import csv
import os
import json
#################################################################################
def extraction(filteredClusters):
idList = []
for line in filteredClusters.readlines():
if ">>" in line:
idList.append(line.split("|")[0].lstrip(">>").rstrip(" "))
return idList
def descriptionExtraction(annotationFile):
descsDic = dict()
for item in annotationFile.readlines():
clusterName = item.split("\t")[1].strip()
desc = item.split("\t")[5].strip()
descsDic.update({clusterName:desc})
return descsDic
def generate(filteredDescs, descsDic ,idList):
resFile = open("/home/samaneh/eggNOG/output/eggnogClusterDescriptions_onEggNOG.txt", "w")
dualDic = dict()
for line in filteredDescs.readlines():
if " :" in line :
##@sam## clear all dictionary and list and cluaterName variables for next cluster storage
dualDic.clear()
clusterName = ""
clusterName = re.sub(" :","",line).strip(" ")
clusterName = clusterName.strip("\n").strip(" ")
elif ">>" in line:
dual = line.split("|")
uniprotId = dual[0].lstrip(">>").rstrip(" ")
if uniprotId in idList:
desc = descsDic[clusterName]
dualDic.update({uniprotId:desc})
elif "#" in line:
##@sam## write into the file
toPrint = clusterName + " :\n"
for n in dualDic.keys():
toPrint = toPrint + ">>" + n + " | " + dualDic[n] + "\n"
toPrint = toPrint + "\n**************" + "\n"
resFile.write(toPrint)
else:
continue
resFile.close()
def handler():
filteredDescs = open("/home/samaneh/eggNOG/output/eggNOG_clustering_descriptions_filtered.txt")
filteredClusters = open("/home/samaneh/eggNOG/output/ClusterDescriptions_onEggNOG.txt")
annotationFile = open("/home/samaneh/eggNOG/data/NOG.annotations.tsv", "r")
idList = extraction(filteredClusters)
descsDic = descriptionExtraction(annotationFile)
generate(filteredDescs, descsDic, idList)
if __name__ == "__main__":
handler()