kajalag commited on
Commit
7571eef
·
1 Parent(s): 2891753

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from transformers import AutoTokenizer
4
+ from transformers import AutoModelForSequenceClassification
5
+ import warnings
6
+ warnings.filterwarnings("ignore")
7
+ import nltk
8
+ nltk.download('all')
9
+ import matplotlib.pyplot as plt
10
+ import helper
11
+ import preprocessor
12
+ from mtranslate import translate
13
+ import pandas as pd
14
+ import os
15
+ from gtts import gTTS
16
+ import base64
17
+ import torch
18
+ import seaborn as sns
19
+ st.sidebar.title("Whatsapp Chat analyzer")
20
+
21
+ uploaded_file= st.sidebar.file_uploader("Choose a file")
22
+
23
+ if uploaded_file is not None:
24
+
25
+ bytes_data = uploaded_file.getvalue()
26
+ data=bytes_data.decode("utf-8")
27
+ df_new= preprocessor.preprocess(data)
28
+
29
+ user_list= df_new['users'].unique().tolist()
30
+ user_list.sort()
31
+ user_list.insert(0,"Group analysis")
32
+ selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
33
+ if st.sidebar.button("Show Analysis"):
34
+ num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
35
+ st.title("Top Statistics")
36
+ col1,col2,col3=st.columns(3)
37
+
38
+ with col1:
39
+ st.header("Total Messages")
40
+ st.title(num_messages)
41
+ with col2:
42
+ st.header("Total Words")
43
+ st.title(words)
44
+ with col3:
45
+ st.header("Links Shared")
46
+ st.title(num_links)
47
+
48
+ st.title("Timeline")
49
+ col1, col2 = st.columns(2)
50
+
51
+ with col1:
52
+ st.header("Monthly ")
53
+ timeline = helper.monthly_timeline(selected_user, df_new)
54
+ fig, ax = plt.subplots()
55
+ ax.plot(timeline['time'], timeline['message'])
56
+ plt.xticks(rotation='vertical')
57
+ st.pyplot(fig)
58
+ with col2:
59
+ st.title("Daily")
60
+ daily_timeline = helper.Daily_timeline(selected_user, df_new)
61
+ fig, ax = plt.subplots()
62
+ ax.plot(daily_timeline['Date'], daily_timeline['message'], color='black')
63
+ plt.xticks(rotation='vertical')
64
+ st.pyplot(fig)
65
+
66
+ st.title("Activity Map")
67
+ col1,col2=st.columns(2)
68
+
69
+ with col1:
70
+ st.header("Most busy day")
71
+ busy_day=helper.week_activity_map(selected_user, df_new)
72
+ fig,ax=plt.subplots()
73
+ ax.bar(busy_day.index,busy_day.values,color=('violet','indigo','blue','green','yellow','orange','red'))
74
+ plt.xticks(rotation='vertical')
75
+ st.pyplot(fig)
76
+ with col2:
77
+ st.header("Most busy Month")
78
+ busy_day = helper.month_activity_map(selected_user, df_new)
79
+ fig, ax = plt.subplots()
80
+ ax.bar(busy_day.index, busy_day.values,color=('indigo','blue','green','red'))
81
+ plt.xticks(rotation='vertical')
82
+ st.pyplot(fig)
83
+
84
+ st.title("Weekly Activity HeatMap")
85
+ Activity_heatmap=helper.activity_heatmap(selected_user,df_new)
86
+ fig,ax=plt.subplots()
87
+ ax=sns.heatmap(Activity_heatmap,cmap='RdBu',linewidths=1,linecolor='black')
88
+ st.pyplot(fig)
89
+
90
+ if selected_user == "Group analysis":
91
+ st.title("Most busy user")
92
+ x,new_df=helper.most_busy_users(df_new)
93
+ fig,ax=plt.subplots()
94
+ col1,col2=st.columns(2)
95
+
96
+ with col1:
97
+ ax.bar(x.index, x.values,color=('blue','red','pink','orange','green'))
98
+ plt.xticks(rotation='vertical')
99
+ st.pyplot(fig)
100
+ with col2:
101
+ st.dataframe(new_df)
102
+
103
+ st.title("Chat Sentiment Analysis")
104
+ col1, col2, col3 = st.columns(3)
105
+
106
+ with col1:
107
+ st.header("Positive")
108
+ pos_words = helper.pos_words(selected_user, df_new)
109
+ st.dataframe(pos_words)
110
+ with col2:
111
+ st.header("Negative")
112
+ neg_words = helper.neg_words(selected_user, df_new)
113
+ st.dataframe(neg_words)
114
+ with col3:
115
+ st.header("Neutral")
116
+ neu_words = helper.neu_words(selected_user, df_new)
117
+ st.dataframe(neu_words)
118
+
119
+
120
+ st.title("Word cloud")
121
+ df_wc = helper.word_cloud(selected_user, df_new)
122
+ fig, ax = plt.subplots()
123
+ ax.imshow(df_wc)
124
+ plt.axis('off')
125
+ st.pyplot(fig)
126
+
127
+ st.title("Most Common Words")
128
+ most_common_df=helper.most_common_words(selected_user,df_new)
129
+ fig,ax=plt.subplots()
130
+ ax.barh(most_common_df[0],most_common_df[1])
131
+ st.pyplot(fig)
132
+ st.dataframe(most_common_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
133
+
134
+ emoji_df=helper.emoji_helper(selected_user,df_new)
135
+ st.title("Emoji Analysis")
136
+ st.dataframe(emoji_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
137
+
138
+
139
+ st.title("Sentiment Analysis")
140
+ @st.cache(allow_output_mutation=True)
141
+ def get_model():
142
+ MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
143
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
144
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
145
+ return tokenizer,model
146
+
147
+
148
+ tokenizer, model = get_model()
149
+
150
+ user_input = st.text_area('Enter Text to Analyze')
151
+ button = st.button("Analyze")
152
+
153
+ sent_pipeline = pipeline("sentiment-analysis")
154
+ if user_input and button:
155
+ test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
156
+ # test_sample
157
+ output = model(**test_sample)
158
+ st.write("Prediction: ", sent_pipeline(user_input))
159
+ showWarningOnDirectExecution = False