File size: 4,050 Bytes
87010b2
 
 
 
 
 
 
14c27e2
f493631
 
 
 
0833807
 
 
87010b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14c27e2
87010b2
 
 
 
 
14c27e2
 
 
 
 
87010b2
14c27e2
87010b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14c27e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
import tweepy
from plotly.subplots import make_subplots
from transformers import pipeline

consumer_key = "sHz78Xj5Dl41cqfzEHVoRcaKo"
consumer_secret = "3y5caZfu91nmB2MNH7mDSu5Cgf5qaVRpMfbDoCPW4dU7E46k03"
access_key = "1116912581434695680-x359MscPSdqEcJzoIlg4jMsCZRdyNX"
access_secret = "wEsALFUava2TnYXWnuacrzSK4eiYfJUFLBRWPqGuMRnTz"

auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_key,access_secret)
api = tweepy.API(auth)


def get_tweets(username, count):
    tweets = tweepy.Cursor(
        api.user_timeline,
        screen_name=username,
        tweet_mode="extended",
        exclude_replies=True,
        include_rts=False,
    ).items(count)

    tweets = list(tweets)
    response = {
        "tweets": [tweet.full_text.replace("\n", "").lower() for tweet in tweets],
        "timestamps": [str(tweet.created_at) for tweet in tweets],
        "retweets": [tweet.retweet_count for tweet in tweets],
        "likes": [tweet.favorite_count for tweet in tweets],
    }
    return response


def get_sentiment(texts):
    preds = pipe(texts)

    response = dict()
    response["labels"] = [pred["label"] for pred in preds]
    response["scores"] = [pred["score"] for pred in preds]
    return response


def neutralise_sentiment(preds):
    for i, (label, score) in enumerate(zip(preds["labels"], preds["scores"])):
        if score < 0.5:
            preds["labels"][i] = "neutral"
            preds["scores"][i] = 1.0 - score


def get_aggregation_period(df):
    t_min, t_max = df["timestamps"].min(), df["timestamps"].max()
    t_delta = t_max - t_min
    if t_delta < pd.to_timedelta("30D"):
        return "1D"
    elif t_delta < pd.to_timedelta("365D"):
        return "7D"
    else:
        return "30D"


@st.cache(allow_output_mutation=True)
def load_model():
    pipe = pipeline(task="sentiment-analysis", model="bhadresh-savani/distilbert-base-uncased-emotion")
    return pipe


"""
# Twitter Emotion Analyser
"""


pipe = load_model()
twitter_handle = st.sidebar.text_input("Twitter handle:", "huggingface")
twitter_count = st.sidebar.selectbox("Number of tweets:", (10, 100, 500, 1000, 3200))


if st.sidebar.button("Get tweets!"):
    tweets = get_tweets(twitter_handle, twitter_count)
    preds = get_sentiment(tweets["tweets"])
    # neutralise_sentiment(preds)
    tweets.update(preds)
    # dataframe creation + preprocessing
    df = pd.DataFrame(tweets)
    df["timestamps"] = pd.to_datetime(df["timestamps"])
    # plots
    agg_period = get_aggregation_period(df)
    ts_sentiment = (
        df.groupby(["timestamps", "labels"])
        .count()["likes"]
        .unstack()
        .resample(agg_period)
        .count()
        .stack()
        .reset_index()
    )
    ts_sentiment.columns = ["timestamp", "label", "count"]

    fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.15)

    # TODO: check that stacking makes sense!
    for label in ts_sentiment["label"].unique():
        fig.add_trace(
            go.Scatter(
                x=ts_sentiment.query("label == @label")["timestamp"],
                y=ts_sentiment.query("label == @label")["count"],
                mode="lines",
                name=label,
                stackgroup="one",
                hoverinfo="x+y",
            ),
            row=1,
            col=1,
        )

    likes_per_label = df.groupby("labels")["likes"].mean().reset_index()

    fig.add_trace(
        go.Bar(
            x=likes_per_label["labels"],
            y=likes_per_label["likes"],
            showlegend=False,
            marker_color=px.colors.qualitative.Plotly,
            opacity=0.6,
        ),
        row=1,
        col=2,
    )

    fig.update_yaxes(title_text="Number of Tweets", row=1, col=1)
    fig.update_yaxes(title_text="Number of Likes", row=1, col=2)
    fig.update_layout(height=350, width=750)

    st.plotly_chart(fig)

    # tweet sample
    st.markdown(df.sample(n=5).to_markdown())