diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/README.md b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/README.md new file mode 100644 index 000000000..cb2c45705 --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/README.md @@ -0,0 +1,77 @@ +# Twitter Topic Modeling and Sentiment Analysis + +A Flask web app with twitter OAuth for analysing user's tweet topics and its sentiments. + +The user has to login using his/her twitter account details. + +The application will retrieve user's tweets and apply an unsupervised clustering technique in NLP known as topic modeling to classify tweets into topics.The model used here is the Biterm topic modleing model know as BTM.BTM is most suitable model for small texts like tweets. + +The application will also perform sentiment analysis using the vader sentiment analysis module. + +## Prerequisites + +* Clone the [BTM repo](https://github.com/markoarnauto/biterm) and copy-paste the biterm folder from there in the current folder. + +* Create a twitter developer's account. + +* Create an app from the [developer's home page](https://developer.twitter.com/en/apps) + +* Mention the details for the app as shown, you can rename other fields but keep the callback url same. +``` +Callback url : http://127.0.0.1:5000/login/twitter/authorized +``` + +![twitter-app-details](images/twitter-app-details.JPG) + +* From this page go to "Keys and tokens" and copy your tokens and paste in the app.py file. +``` +twitter_blueprint = make_twitter_blueprint( + api_key= "", api_secret= """) +``` + +Website URL can be any random url. + +* Install dependencies +``` +pip install -r requirements.txt +``` + + +## Usage +* In the main directory run the following command +``` +python app.py +``` +* You will get a url, go to that url. +In most cases it is this- +``` +http://127.0.0.1:5000/ +``` + +* The app is hosted!! + +* Login using your twitter credentials + +![login](images/app-twitter-oauth.JPG) + +* Enter your profile display name/ID. + +## Screenshots + +* Final user dashboard + +![user dashboard](images/twitter-app-dashboard.JPG) + +1. Your username/display name + +2. Topics clustered by BTM + +3. Each tweet classified into one of the 3 topics. + +4. Vader Sentiment analysis for each tweet. + +Each negative tweet is highlighted with grey background. + +## Author name + +Priya Mane diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/btm_model.cpython-37.pyc b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/btm_model.cpython-37.pyc new file mode 100644 index 000000000..b8fdb4bc5 Binary files /dev/null and b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/btm_model.cpython-37.pyc differ diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/sentiment.cpython-37.pyc b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/sentiment.cpython-37.pyc new file mode 100644 index 000000000..48a94be35 Binary files /dev/null and b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/sentiment.cpython-37.pyc differ diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/text_cleaning.cpython-37.pyc b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/text_cleaning.cpython-37.pyc new file mode 100644 index 000000000..6737d273b Binary files /dev/null and b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/text_cleaning.cpython-37.pyc differ diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/app.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/app.py new file mode 100644 index 000000000..19e795148 --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/app.py @@ -0,0 +1,75 @@ +from flask import Flask, redirect, url_for, render_template +from flask_dance.contrib.twitter import make_twitter_blueprint, twitter +import requests +import btm_model +import text_cleaning +import sentiment + +app = Flask(__name__) +app.config['SECRET_KEY'] = "youareawesomethiscanbeanything" + +twitter_blueprint = make_twitter_blueprint( + api_key="", api_secret="") + +app.register_blueprint(twitter_blueprint, url_prefix='/login') + + +@app.route('/') +def index(): + # Home page + # If the user is not authorized, redirect to the twitter login page + if not twitter.authorized: + return redirect(url_for('twitter.login')) + return redirect("http://127.0.0.1:5000/twitter") + + +@app.route('/twitter') +def twitter_login(): + # If the user is not authorized, redirect to the twitter login page + if not twitter.authorized: + return redirect(url_for('twitter.login')) + # If user is authorized retrieve his/her account details + account_info = twitter.get('account/settings.json') + # If user is authorized retrieve his/her tweets + user_tweets = twitter.get( + "statuses/user_timeline.json") + + # If account information is successfully retrieved, proceed to analyse and display it + if account_info.ok: + # Convert retrieved info to json format + user_tweets_json = user_tweets.json() + account_info_json = account_info.json() + + # Get tweet text from the objects returned + all_tweets = [] + print(account_info_json) + for tweet in user_tweets_json: + all_tweets.append(tweet['text']) + + # Text Cleaning for tweets + all_tweets_cleaned = text_cleaning.clean_tweets(all_tweets) + + # BTM model for topic modeling results + classified_tweets, topics = btm_model.categorize(all_tweets_cleaned) + + # Sentiment analysis + tweet_sentiment = sentiment.get_sentiment(all_tweets_cleaned) + + # Prepare data to be sent and rendered on the template for user dashboard + data = { + "all_tweets": all_tweets, + "account_info_json": account_info_json, + "classified_tweets": classified_tweets, + "topics": topics, + "sentiment": tweet_sentiment + } + + # Render template with user data + return render_template('user_dash.html', data=data) + + # If account info is not retrieved successfully return an error message. + return '

Error

' + + +if __name__ == '__main__': + app.run(debug=True) diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/btm_model.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/btm_model.py new file mode 100644 index 000000000..ad64f8969 --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/btm_model.py @@ -0,0 +1,42 @@ +import numpy as np +from biterm.biterm.btm import oBTM +from sklearn.feature_extraction.text import CountVectorizer +from biterm.biterm.utility import vec_to_biterms, topic_summuary + + +def categorize(tweets_list, number_of_topics=3): + + # vectorize texts + vec = CountVectorizer(stop_words='english') + X = vec.fit_transform(tweets_list).toarray() + + # get vocabulary + vocab = np.array(vec.get_feature_names()) + + # get biterms + biterms = vec_to_biterms(X) + + # create btm + btm = oBTM(num_topics=number_of_topics, V=vocab) + + # print("\n\n Train Online BTM ..") + for i in range(0, len(biterms), 100): # prozess chunk of 200 texts + biterms_chunk = biterms[i:i + 100] + btm.fit(biterms_chunk, iterations=50) + topics = btm.transform(biterms) + + #print("\n\n Topic coherence ..") + res = topic_summuary(btm.phi_wz.T, X, vocab, 6) + + topics_top_words = res['top_words'] + + topic_classification = [] + + # print("\n\n Texts & Topics ..") + for i in range(len(tweets_list)): + # print("{} (topic: {})".format(tweets_list[i], topics[i].argmax())) + topic_classification.append(topics[i].argmax()) + + # print(type(topics)) + + return topic_classification, topics_top_words diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/app-twitter-oauth.JPG b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/app-twitter-oauth.JPG new file mode 100644 index 000000000..f06b507d2 Binary files /dev/null and b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/app-twitter-oauth.JPG differ diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-dashboard.JPG b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-dashboard.JPG new file mode 100644 index 000000000..9a1afcaa5 Binary files /dev/null and b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-dashboard.JPG differ diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-details.JPG b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-details.JPG new file mode 100644 index 000000000..ff57e4a65 Binary files /dev/null and b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-details.JPG differ diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/requirements.txt b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/requirements.txt new file mode 100644 index 000000000..2589a86fd --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/requirements.txt @@ -0,0 +1,7 @@ +flask +requests +Flask-Dance +numpy +vaderSentiment +nltk +re diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/sentiment.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/sentiment.py new file mode 100644 index 000000000..2ac3561c4 --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/sentiment.py @@ -0,0 +1,24 @@ +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer + + +def get_sentiment(tweets): + # define the analyser object + analyser = SentimentIntensityAnalyzer() + sentiment = [] + + for t in tweets: + # get polairty score for each tweet. + # sentiment_dict will have various scores for pos, neg, neu, and a compound score(-1 to 1) + sentiment_dict = analyser.polarity_scores(t) + + # Considering compound score for determining tweet sentiment + if sentiment_dict['compound'] >= 0.05: + sentiment.append("Positive") + + elif sentiment_dict['compound'] <= - 0.05: + sentiment.append("Negative") + + else: + sentiment.append("Neutral") + + return sentiment diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/templates/user_dash.html b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/templates/user_dash.html new file mode 100644 index 000000000..61838f58d --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/templates/user_dash.html @@ -0,0 +1,86 @@ + + + + Home + + + + + + + +

Welcome @{{ data['account_info_json']['screen_name'] }} !!

+ +
+ {% for i in range (0,data['topics']| length) %} +

Topic {{ i }} is {{ data['topics'][i] }}

+ {% endfor %} + +
+ +

Classified tweets

+ + + + + + + {% for i in range(0,data['all_tweets']| length) %} + + + {% if data['sentiment'][i]=='Negative' %} + + {% else %} + + {% endif %} + + {% if data['sentiment'][i]=='Negative' %} + + {% else %} + + {% endif %} + + + + {% if data['sentiment'][i]=='Negative' %} + + {% else %} + + {% endif %} + + {% endfor %} +
TWEETTOPICSENTIMENT
+ {{ data['all_tweets'][i] }} + + {{ data['all_tweets'][i] }} + + {{ data['classified_tweets'][i] }} + + {{ data['classified_tweets'][i] }} + + {{ data['sentiment'][i] }} + + {{ data['sentiment'][i] }} +
+ + \ No newline at end of file diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/text_cleaning.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/text_cleaning.py new file mode 100644 index 000000000..8dc15d978 --- /dev/null +++ b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/text_cleaning.py @@ -0,0 +1,32 @@ +import nltk +import re +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize + + +def remove_stop_words(sentence): + # Define stop words + stop_words = set(stopwords.words('english')) + + # Tokenize sentences + word_tokens = word_tokenize(sentence) + + # remove stop words from the tokens + filtered_sentence = [w for w in word_tokens if w not in stop_words] + + sentence = ' '.join(filtered_sentence) + return sentence + + +def clean_tweets(tweets): + + # remove url + no_url_tweets = [re.sub(r"http\S+", "", t) for t in tweets] + + # remove stop words + clean_tweets = [remove_stop_words(t) for t in no_url_tweets] + + # remove punctuation signs + clean_tweets = [re.sub(r'[^\w\s]', '', t) for t in clean_tweets] + + return clean_tweets