-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtweeter.py
More file actions
107 lines (89 loc) · 2.64 KB
/
tweeter.py
File metadata and controls
107 lines (89 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""
Dump tweets in elasticsearch
"""
import argparse
import configparser
import csv
import os
import sys
import time
from datetime import datetime
from time import mktime
import IPython
from elasticsearch import Elasticsearch
from twarc import Twarc
config = configparser.ConfigParser()
config.read('/home/svjethoe/.twarc')
index = 'visualanalytics'
es = Elasticsearch()
t = Twarc(config.get('main', 'consumer_key'),
config.get('main', 'consumer_secret'),
config.get('main', 'access_token'),
config.get('main', 'access_token_secret'))
def readtokens(f='binance-subset.csv'):
records = []
with open(f, 'r') as csvfile:
for row in csv.reader(csvfile):
records.append("#%s" % row[1])
return records
def create_index(index):
settings = {
'settings': {
'number_of_shards': 1,
'number_of_replicas': 0
},
'mappings': {
'tweet': {
'properties': {
'country': {'type': 'text'},
'timestamp': {
'type': 'date'},
'text': {'type': 'text'},
'url': {'type': 'text'}
}
}
}
}
if not es.indices.exists(index):
es.indices.create(index, settings)
def post(tweet):
try:
url = tweet['entities']['urls'][0]['url']
except:
url = None
ts = time.strftime('%Y-%m-%dT%H:%M:%S', time.strptime(tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y'))
doc = {
'url': url,
'timestamp': ts,
'type': 'tweet',
'country': tweet['user']['location'],
'tags': [x['text'].lower() for x in tweet['entities']['hashtags']],
'text': tweet['full_text'] or tweet['extended_tweet']['full_text']}
res = es.index(
index=index,
doc_type='tweet',
id=tweet['id'],
body=doc)
return res
def main():
tokens = readtokens()
create_index(index)
# tweets = t.search("#bitcoin")
searchtokens = " OR ".join(tokens)
since = "2018-02-13"
until = "2018-02-14"
if since and until:
timerange = "since:%s until:%s" % (since, until)
elif since:
timerange = "since:%s" % since
else:
timerange = "since:%s" % time.strftime("%Y-%m-%d")
# for tweet in t.search("#btc OR #eth since:2018-01-01 until:2018-01-28"):
for tweet in t.search("%s %s" % (searchtokens, timerange)):
post(tweet)
# IPython.embed()
# sys.exit()
if __name__ == "__main__":
main()
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fdm=indent nocompatible