Skip to content

Commit 02445fc

Browse files
authored
Add a way to filter nodes by user agent (exclude_uagent_string) (#13)
Add a way to filter nodes by user agent (exclude_uagent_string)
2 parents 1543528 + d52e31e commit 02445fc

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

conf/export.conf.default

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,6 @@ debug = False
1414

1515
# Relative path to directory containing timestamp-prefixed JSON export files
1616
export_dir = data/export/e3e1f3e8
17+
18+
# filter by user_agent
19+
exclude_uagent_string =

export.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import os
3434
import sys
3535
import time
36+
import re
37+
3638
from binascii import hexlify, unhexlify
3739
from ConfigParser import ConfigParser
3840

@@ -48,6 +50,10 @@ def get_row(node):
4850
"""
4951
# address, port, version, user_agent, timestamp, services
5052
node = eval(node)
53+
uagent = node[3]
54+
p = re.compile(CONF['exclude_uagent_string'])
55+
if (p.search(uagent) is not None):
56+
return ''
5157
address = node[0]
5258
port = node[1]
5359
services = node[-1]
@@ -83,7 +89,8 @@ def export_nodes(nodes, timestamp):
8389
start = time.time()
8490
for node in nodes:
8591
row = get_row(node)
86-
rows.append(row)
92+
if (row != ''):
93+
rows.append(row)
8794
end = time.time()
8895
elapsed = end - start
8996
logging.info("Elapsed: %d", elapsed)
@@ -106,6 +113,7 @@ def init_conf(argv):
106113
CONF['db'] = conf.getint('export', 'db')
107114
CONF['debug'] = conf.getboolean('export', 'debug')
108115
CONF['export_dir'] = conf.get('export', 'export_dir')
116+
CONF['exclude_uagent_string'] = conf.get('export', 'exclude_uagent_string')
109117
if not os.path.exists(CONF['export_dir']):
110118
os.makedirs(CONF['export_dir'])
111119

0 commit comments

Comments
 (0)