Spaces:

Lagstill
/

Streamlit_reddit

Runtime error

App Files Files Community

Lagstill commited on Jan 8, 2023

Commit

e6dc112

1 Parent(s): c50cb6c

dashboard added

Browse files

Files changed (1) hide show

dashboard.py +111 -0

dashboard.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import praw
+import pandas as pd
+import datetime as dt
+from wordcloud import WordCloud, STOPWORDS
+reddit = praw.Reddit(client_id='w0cDom4nIf5druip4y9zSw', \
+                     client_secret='mtCul8hEucwNky7hLwgkewlLPzH0sg', \
+                     user_agent='Profile extractor', \
+                     username='CarelessSwordfish541', \
+                     password='Testing@2022')
+st.title('Just Reddit as it is 👀')
+st.write('This is a simple web app to extract data from Reddit and analyze it.')
+DATA_URL = 'subreddit_data_v1.csv'
+@st.cache
+def load_data():
+    data = pd.read_csv(DATA_URL)
+    lowercase = lambda x: str(x).lower()
+    data.rename(lowercase, axis='columns', inplace=True)
+    return data
+data_load_state = st.text('Loading data...')
+data = load_data()
+data_load_state.text("Done! (using st.cache)")
+if st.checkbox('Show raw data'):
+    st.subheader('Raw data')
+    st.write(data)
+subreddit = st.selectbox('Select a subreddit', data['subreddit'].unique())
+st.subheader('Wordcloud of the most common words in the subreddit')
+comment_words = ''
+stopwords = set(STOPWORDS)
+# iterate through the csv file
+for val in data[data['subreddit'] == subreddit]['title']:
+    # typecaste each val to string
+    val = str(val)
+    # split the value
+    tokens = val.split()
+    # Converts each token into lowercase
+    for i in range(len(tokens)):
+        tokens[i] = tokens[i].lower()
+    comment_words += " ".join(tokens)+" "
+wordcloud = WordCloud(width = 800, height = 800,
+                background_color ='white',
+                stopwords = stopwords,
+                min_font_size = 10).generate(comment_words)
+# plot the WordCloud image
+plt.figure(figsize = (8, 8), facecolor = None)
+plt.imshow(wordcloud)
+plt.axis("off")
+plt.tight_layout(pad = 0)
+st.set_option('deprecation.showPyplotGlobalUse', False)
+st.pyplot()
+#Based on the subreddit selected , show the statistics of the subreddit
+st.subheader('Statistics of the subreddit')
+st.write(data[data['subreddit'] == subreddit].describe())
+#Based on the subreddit selected display the number of posts per day
+st.subheader('Number of posts per day')
+st.write(data[data['subreddit'] == subreddit].groupby('created')['title'].count())
+#Based on the subreddit selected display the number of comments per day
+st.subheader('Number of comments per day')
+st.write(data[data['subreddit'] == subreddit].groupby('created')['num_comments'].sum())
+#display a bar chart of the score of the posts
+st.subheader('Score of the posts')
+st.bar_chart(data[data['subreddit'] == subreddit]['score'])
+# st.subheader('Number of pickups by hour')
+# hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
+# st.bar_chart(hist_values)
+# # Some number in the range 0-23
+# hour_to_filter = st.slider('hour', 0, 23, 17)
+# filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
+# st.subheader('Map of all pickups at %s:00' % hour_to_filter)
+# st.map(filtered_data)