From 83a6935314ce33f83411787254512d3ddbddc71f Mon Sep 17 00:00:00 2001 From: "Davidelvis,user.email" Date: Tue, 30 Jun 2020 17:11:06 +0300 Subject: [PATCH 1/2] Update in contributors list --- contributors/David_Elvis.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 contributors/David_Elvis.txt diff --git a/contributors/David_Elvis.txt b/contributors/David_Elvis.txt new file mode 100644 index 0000000..540ff51 --- /dev/null +++ b/contributors/David_Elvis.txt @@ -0,0 +1 @@ +DAVID_ELVIS From eb1b112d79016b34740b9b3704e3886061ce99e7 Mon Sep 17 00:00:00 2001 From: "Davidelvis,user.email" Date: Wed, 1 Jul 2020 08:29:29 +0300 Subject: [PATCH 2/2] mistakes corrected in twitter_mining --- twitter_mining/twitter_mining.ipynb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/twitter_mining/twitter_mining.ipynb b/twitter_mining/twitter_mining.ipynb index a6460ee..5f58a93 100644 --- a/twitter_mining/twitter_mining.ipynb +++ b/twitter_mining/twitter_mining.ipynb @@ -161,7 +161,7 @@ " def clean_tweets(self,twitter_text):\n", "\n", " #use pre processor\n", - " tweet = p.clean(twitter_text)\n", + " tweet = ppr.clean(twitter_text)\n",#the library pre processor was imported as ppr not p "\n", " #HappyEmoticons\n", " emoticons_happy = set([\n", @@ -193,7 +193,7 @@ " emoticons = emoticons_happy.union(emoticons_sad)\n", "\n", " stop_words = set(stopwords.words('english'))\n", - " word_tokens = word_tokenize(tweet)\n", + " word_tokens = word_tokenize(tweet)\n", #word_tokens was not imported from the nltk library " #after tweepy preprocessing the colon symbol left remain after \n", " #removing mentions\n", " tweet = re.sub(r':', '', tweet)\n", @@ -229,7 +229,7 @@ " \n", "\n", " #page attribute in tweepy.cursor and iteration\n", - " for page in tweepy.Cursor(api.search, q=keyword,count=200, include_rts=False):\n", + " for page in tweepy.Cursor(self.api.search, q=keyword,count=200, include_rts=False).pages():\n", #it is self.api.search not api.search and call the pages to iterate "\n", "\n", " for status in page:\n", @@ -253,6 +253,8 @@ " df.at[i, 'retweet_count'] = status['retweet_count']\n", " continue\n", "\n", + tweet_text = status['text'] # here first we should extract the tweet text and filter it by calling self.clean_tweets() + filtered_tweet = self.clean_tweets(tweet_text) " #calculate sentiment\n", " blob = TextBlob(filtered_tweet)\n", " Sentiment = blob.sentiment \n", @@ -327,7 +329,7 @@ "\n", "#get data on keywords\n", "ts = tweetsearch()\n", - "df = ts.get_tweets(covid_keywords, csvfile=tweets_file) #you saved the " + "df = ts.get_tweets(covid_keywords, csvfile=tweets_file) #the function was named as get_tweets and not get_data " ] }, {