Spaces:
Runtime error
Runtime error
Lagstill
commited on
Commit
·
e6dc112
1
Parent(s):
c50cb6c
dashboard added
Browse files- dashboard.py +111 -0
dashboard.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import praw
|
7 |
+
import pandas as pd
|
8 |
+
import datetime as dt
|
9 |
+
from wordcloud import WordCloud, STOPWORDS
|
10 |
+
|
11 |
+
reddit = praw.Reddit(client_id='w0cDom4nIf5druip4y9zSw', \
|
12 |
+
client_secret='mtCul8hEucwNky7hLwgkewlLPzH0sg', \
|
13 |
+
user_agent='Profile extractor', \
|
14 |
+
username='CarelessSwordfish541', \
|
15 |
+
password='Testing@2022')
|
16 |
+
|
17 |
+
st.title('Just Reddit as it is 👀')
|
18 |
+
|
19 |
+
st.write('This is a simple web app to extract data from Reddit and analyze it.')
|
20 |
+
|
21 |
+
DATA_URL = 'subreddit_data_v1.csv'
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
@st.cache
|
26 |
+
def load_data():
|
27 |
+
data = pd.read_csv(DATA_URL)
|
28 |
+
lowercase = lambda x: str(x).lower()
|
29 |
+
data.rename(lowercase, axis='columns', inplace=True)
|
30 |
+
return data
|
31 |
+
|
32 |
+
data_load_state = st.text('Loading data...')
|
33 |
+
data = load_data()
|
34 |
+
data_load_state.text("Done! (using st.cache)")
|
35 |
+
|
36 |
+
|
37 |
+
if st.checkbox('Show raw data'):
|
38 |
+
st.subheader('Raw data')
|
39 |
+
st.write(data)
|
40 |
+
|
41 |
+
subreddit = st.selectbox('Select a subreddit', data['subreddit'].unique())
|
42 |
+
|
43 |
+
st.subheader('Wordcloud of the most common words in the subreddit')
|
44 |
+
|
45 |
+
|
46 |
+
comment_words = ''
|
47 |
+
stopwords = set(STOPWORDS)
|
48 |
+
|
49 |
+
# iterate through the csv file
|
50 |
+
for val in data[data['subreddit'] == subreddit]['title']:
|
51 |
+
# typecaste each val to string
|
52 |
+
val = str(val)
|
53 |
+
|
54 |
+
# split the value
|
55 |
+
tokens = val.split()
|
56 |
+
|
57 |
+
# Converts each token into lowercase
|
58 |
+
for i in range(len(tokens)):
|
59 |
+
tokens[i] = tokens[i].lower()
|
60 |
+
|
61 |
+
comment_words += " ".join(tokens)+" "
|
62 |
+
|
63 |
+
wordcloud = WordCloud(width = 800, height = 800,
|
64 |
+
background_color ='white',
|
65 |
+
stopwords = stopwords,
|
66 |
+
min_font_size = 10).generate(comment_words)
|
67 |
+
|
68 |
+
# plot the WordCloud image
|
69 |
+
plt.figure(figsize = (8, 8), facecolor = None)
|
70 |
+
|
71 |
+
plt.imshow(wordcloud)
|
72 |
+
|
73 |
+
plt.axis("off")
|
74 |
+
|
75 |
+
plt.tight_layout(pad = 0)
|
76 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
77 |
+
st.pyplot()
|
78 |
+
|
79 |
+
|
80 |
+
#Based on the subreddit selected , show the statistics of the subreddit
|
81 |
+
st.subheader('Statistics of the subreddit')
|
82 |
+
st.write(data[data['subreddit'] == subreddit].describe())
|
83 |
+
|
84 |
+
#Based on the subreddit selected display the number of posts per day
|
85 |
+
st.subheader('Number of posts per day')
|
86 |
+
st.write(data[data['subreddit'] == subreddit].groupby('created')['title'].count())
|
87 |
+
|
88 |
+
#Based on the subreddit selected display the number of comments per day
|
89 |
+
st.subheader('Number of comments per day')
|
90 |
+
st.write(data[data['subreddit'] == subreddit].groupby('created')['num_comments'].sum())
|
91 |
+
|
92 |
+
#display a bar chart of the score of the posts
|
93 |
+
st.subheader('Score of the posts')
|
94 |
+
st.bar_chart(data[data['subreddit'] == subreddit]['score'])
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
# st.subheader('Number of pickups by hour')
|
101 |
+
# hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
|
102 |
+
# st.bar_chart(hist_values)
|
103 |
+
|
104 |
+
# # Some number in the range 0-23
|
105 |
+
# hour_to_filter = st.slider('hour', 0, 23, 17)
|
106 |
+
# filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
|
107 |
+
|
108 |
+
# st.subheader('Map of all pickups at %s:00' % hour_to_filter)
|
109 |
+
# st.map(filtered_data)
|
110 |
+
|
111 |
+
|