Python-World · mergify · Oct 28, 2020 · Oct 28, 2020 · Oct 28, 2020
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/README.md b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/README.md
@@ -0,0 +1,77 @@
+# Twitter Topic Modeling and Sentiment Analysis
+
+A Flask web app with twitter OAuth for analysing user's tweet topics and its sentiments.
+
+The user has to login using his/her twitter account details.
+
+The application will retrieve user's tweets and apply an unsupervised clustering technique in NLP known as topic modeling to classify tweets into topics.The model used here is the Biterm topic modleing model know as BTM.BTM is most suitable model for small texts like tweets.
+
+The application will also perform sentiment analysis using the vader sentiment analysis module.
+
+## Prerequisites
+
+* Clone the [BTM repo](https://github.com/markoarnauto/biterm) and copy-paste the biterm folder from there in the current folder.
+
+* Create a twitter developer's account.
+
+* Create an app from the [developer's home page](https://developer.twitter.com/en/apps)
+
+* Mention the details for the app as shown, you can rename other fields but keep the callback url same.
+```
+Callback url : http://127.0.0.1:5000/login/twitter/authorized
+```
+
+![twitter-app-details](images/twitter-app-details.JPG)
+
+* From this page go to "Keys and tokens" and copy your tokens and paste in the app.py file.
+```
+twitter_blueprint = make_twitter_blueprint(
+    api_key= "<your_api_key>", api_secret= ""<your_api_secret>")
+```
+
+Website URL can be any random url.
+
+* Install dependencies
+```
+pip install -r requirements.txt
+```
+
+
+## Usage
+* In the main directory run the following command
+```
+python app.py
+```
+* You will get a url, go to that url.
+In most cases it is this-
+```
+http://127.0.0.1:5000/ 
+```
+
+* The app is hosted!!
+
+* Login using your twitter credentials
+
+![login](images/app-twitter-oauth.JPG)
+
+* Enter your profile display name/ID.
+
+## Screenshots
+
+* Final user dashboard
+
+![user dashboard](images/twitter-app-dashboard.JPG)
+
+1. Your username/display name
+
+2. Topics clustered by BTM
+
+3. Each tweet classified into one of the 3 topics.
+
+4. Vader Sentiment analysis for each tweet.
+
+Each negative tweet is highlighted with grey background.
+
+## Author name
+
+Priya Mane
diff --git a/...ts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/btm_model.cpython-37.pyc b/...ts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/btm_model.cpython-37.pyc
diff --git a/...ts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/sentiment.cpython-37.pyc b/...ts/API/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/sentiment.cpython-37.pyc
diff --git a/...PI/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/text_cleaning.cpython-37.pyc b/...PI/Twitter-topic-modeling-and-sentiment-analysis/__pycache__/text_cleaning.cpython-37.pyc
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/app.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/app.py
@@ -0,0 +1,75 @@
+from flask import Flask, redirect, url_for, render_template
+from flask_dance.contrib.twitter import make_twitter_blueprint, twitter
+import requests
+import btm_model
+import text_cleaning
+import sentiment
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = "youareawesomethiscanbeanything"
+
+twitter_blueprint = make_twitter_blueprint(
+    api_key="", api_secret="")
+
+app.register_blueprint(twitter_blueprint, url_prefix='/login')
+
+
+@app.route('/')
+def index():
+    # Home page
+    # If the user is not authorized, redirect to the twitter login page
+    if not twitter.authorized:
+        return redirect(url_for('twitter.login'))
+    return redirect("http://127.0.0.1:5000/twitter")
+
+
+@app.route('/twitter')
+def twitter_login():
+    # If the user is not authorized, redirect to the twitter login page
+    if not twitter.authorized:
+        return redirect(url_for('twitter.login'))
+    # If user is authorized retrieve his/her account details
+    account_info = twitter.get('account/settings.json')
+    # If user is authorized retrieve his/her tweets
+    user_tweets = twitter.get(
+        "statuses/user_timeline.json")
+
+    # If account information is successfully retrieved, proceed to analyse and display it
+    if account_info.ok:
+        # Convert retrieved info to json format
+        user_tweets_json = user_tweets.json()
+        account_info_json = account_info.json()
+
+        # Get tweet text from the objects returned
+        all_tweets = []
+        print(account_info_json)
+        for tweet in user_tweets_json:
+            all_tweets.append(tweet['text'])
+
+        # Text Cleaning for tweets
+        all_tweets_cleaned = text_cleaning.clean_tweets(all_tweets)
+
+        # BTM model for topic modeling results
+        classified_tweets, topics = btm_model.categorize(all_tweets_cleaned)
+
+        # Sentiment analysis
+        tweet_sentiment = sentiment.get_sentiment(all_tweets_cleaned)
+
+        # Prepare data to be sent and rendered on the template for user dashboard
+        data = {
+            "all_tweets": all_tweets,
+            "account_info_json": account_info_json,
+            "classified_tweets": classified_tweets,
+            "topics": topics,
+            "sentiment": tweet_sentiment
+        }
+
+        # Render template with user data
+        return render_template('user_dash.html', data=data)
+
+    # If account info is not retrieved successfully return an error message.
+    return '<h2>Error</h2>'
+
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/btm_model.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/btm_model.py
@@ -0,0 +1,42 @@
+import numpy as np
+from biterm.biterm.btm import oBTM
+from sklearn.feature_extraction.text import CountVectorizer
+from biterm.biterm.utility import vec_to_biterms, topic_summuary
+
+
+def categorize(tweets_list, number_of_topics=3):
+
+    # vectorize texts
+    vec = CountVectorizer(stop_words='english')
+    X = vec.fit_transform(tweets_list).toarray()
+
+    # get vocabulary
+    vocab = np.array(vec.get_feature_names())
+
+    # get biterms
+    biterms = vec_to_biterms(X)
+
+    # create btm
+    btm = oBTM(num_topics=number_of_topics, V=vocab)
+
+    # print("\n\n Train Online BTM ..")
+    for i in range(0, len(biterms), 100):  # prozess chunk of 200 texts
+        biterms_chunk = biterms[i:i + 100]
+        btm.fit(biterms_chunk, iterations=50)
+    topics = btm.transform(biterms)
+
+    #print("\n\n Topic coherence ..")
+    res = topic_summuary(btm.phi_wz.T, X, vocab, 6)
+
+    topics_top_words = res['top_words']
+
+    topic_classification = []
+
+    # print("\n\n Texts & Topics ..")
+    for i in range(len(tweets_list)):
+        # print("{} (topic: {})".format(tweets_list[i], topics[i].argmax()))
+        topic_classification.append(topics[i].argmax())
+
+    # print(type(topics))
+
+    return topic_classification, topics_top_words
diff --git a/.../API/Twitter-topic-modeling-and-sentiment-analysis/images/app-twitter-oauth.JPG b/.../API/Twitter-topic-modeling-and-sentiment-analysis/images/app-twitter-oauth.JPG
diff --git a/.../Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-dashboard.JPG b/.../Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-dashboard.JPG
diff --git a/...PI/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-details.JPG b/...PI/Twitter-topic-modeling-and-sentiment-analysis/images/twitter-app-details.JPG
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/requirements.txt b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/requirements.txt
@@ -0,0 +1,7 @@
+flask
+requests
+Flask-Dance
+numpy
+vaderSentiment
+nltk
+re
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/sentiment.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/sentiment.py
@@ -0,0 +1,24 @@
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+
+def get_sentiment(tweets):
+    # define the analyser object
+    analyser = SentimentIntensityAnalyzer()
+    sentiment = []
+
+    for t in tweets:
+        # get polairty score for each tweet.
+        # sentiment_dict will have various scores for pos, neg, neu, and a compound score(-1 to 1)
+        sentiment_dict = analyser.polarity_scores(t)
+
+        # Considering compound score for determining tweet sentiment
+        if sentiment_dict['compound'] >= 0.05:
+            sentiment.append("Positive")
+
+        elif sentiment_dict['compound'] <= - 0.05:
+            sentiment.append("Negative")
+
+        else:
+            sentiment.append("Neutral")
+
+    return sentiment
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/templates/user_dash.html b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/templates/user_dash.html
@@ -0,0 +1,86 @@
+<html>
+    <head>
+        <title>
+            Home
+        </title>
+
+        <style>
+            table {
+            font-family: arial, sans-serif;
+            border-collapse: collapse;
+            width: 100%;
+            }
+
+            td, th {
+            border: 1px solid #dddddd;
+            text-align: left;
+            padding: 8px;
+            }
+
+            .negative{
+                background-color: #dddddd;
+            }
+
+            .topic{
+                font-family: arial, sans-serif;
+            }
+        </style>
+
+    </head>
+    <body>
+        <!--Jinja templating for retrieving data from the data object sent from the backend-->
+        <h2 style="text-align: center;">Welcome @{{ data['account_info_json']['screen_name'] }} !!</h2>
+
+        <br>
+        {% for i in range (0,data['topics']| length)  %}
+        <p class='topic'>Topic {{ i }} is {{ data['topics'][i] }}</p>
+        {% endfor  %}
+
+        <br>
+
+        <h2>Classified tweets</h2>
+        <table>
+            <tr>
+                <th>TWEET</th>
+                <th>TOPIC</th>
+                <th>SENTIMENT</th>
+            </tr>
+            {% for i in range(0,data['all_tweets']| length) %}
+            <tr>
+
+                {% if data['sentiment'][i]=='Negative' %}
+                <td class="negative">
+                    {{ data['all_tweets'][i] }}
+                </td>
+                {% else %}
+                <td>
+                    {{ data['all_tweets'][i] }}
+                </td>
+                {% endif %}
+
+                {% if data['sentiment'][i]=='Negative' %}
+                <td class="negative">
+                    {{ data['classified_tweets'][i] }}
+                </td>
+                {% else %}
+                <td>
+                    {{ data['classified_tweets'][i] }}
+                </td>
+                {% endif %}
+
+
+
+                {% if data['sentiment'][i]=='Negative' %}
+                <td class="negative">
+                    {{ data['sentiment'][i] }}
+                </td>
+                {% else %}
+                <td>
+                    {{ data['sentiment'][i] }}
+                </td>
+                {% endif %}
+            </tr>
+            {% endfor %} 
+        </table>
+    </body>
+</html>
diff --git a/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/text_cleaning.py b/Scripts/API/Twitter-topic-modeling-and-sentiment-analysis/text_cleaning.py
@@ -0,0 +1,32 @@
+import nltk
+import re
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+
+
+def remove_stop_words(sentence):
+    # Define stop words
+    stop_words = set(stopwords.words('english'))
+
+    # Tokenize sentences
+    word_tokens = word_tokenize(sentence)
+
+    # remove stop words from the tokens
+    filtered_sentence = [w for w in word_tokens if w not in stop_words]
+
+    sentence = ' '.join(filtered_sentence)
+    return sentence
+
+
+def clean_tweets(tweets):
+
+    # remove url
+    no_url_tweets = [re.sub(r"http\S+", "", t) for t in tweets]
+
+    # remove stop words
+    clean_tweets = [remove_stop_words(t) for t in no_url_tweets]
+
+    # remove punctuation signs
+    clean_tweets = [re.sub(r'[^\w\s]', '', t) for t in clean_tweets]
+
+    return clean_tweets