podsnigame commited on
Commit
4594580
·
1 Parent(s): 54be9ee

Add application file

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import snscrape.modules.twitter as sntwitter
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import os
6
+
7
+ st.set_page_config(page_title="Scraping Twitter")
8
+
9
+ st.title('Scraping Twitter')
10
+
11
+ # Input query and number of tweets to scrape
12
+ query = st.text_input('Enter a search query:', 'data science')
13
+ num_tweets = st.number_input(
14
+ 'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)
15
+
16
+ # Scrape tweets and store data in a dataframe
17
+
18
+
19
+ def scrape_tweets(query, num_tweets):
20
+ tweets_list = []
21
+ for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
22
+ if i >= num_tweets:
23
+ break
24
+ tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
25
+ tweet.user.followersCount, tweet.url, tweet.user.id])
26
+ tweets_df = pd.DataFrame(tweets_list, columns=[
27
+ 'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
28
+ return tweets_df
29
+
30
+
31
+ if st.button('Scrape Tweets'):
32
+ tweets_df = scrape_tweets(query, num_tweets)
33
+ st.success('Scraping done!')
34
+
35
+ # Display data
36
+ st.write(tweets_df)
37
+
38
+ # Line plot of tweet count over time
39
+ tweets_df['Date'] = tweets_df['Datetime'].dt.date
40
+ tweets_by_date = tweets_df.groupby(
41
+ ['Date'])['Tweet Id'].count().reset_index()
42
+ fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
43
+ st.plotly_chart(fig)
44
+
45
+ # Scatter plot of followers vs tweet count
46
+ fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
47
+ st.plotly_chart(fig)
48
+
49
+ # Username selection and interaction count
50
+ st.sidebar.title('Username and Interaction Count')
51
+ selected_username = st.sidebar.selectbox(
52
+ 'Select a username:', options=tweets_df['Username'].unique())
53
+ st.sidebar.write(
54
+ f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')
55
+
56
+ # Interaction count by username
57
+ interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
58
+ ).reset_index().sort_values(by=['Tweet Id'], ascending=False)
59
+ fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
60
+ st.plotly_chart(fig)
61
+
62
+ # Interaction count with selected username over time
63
+ tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
64
+ ['Date'])['Tweet Id'].count().reset_index()
65
+ fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
66
+ st.plotly_chart(fig)
67
+
68
+ # Tweets involving selected username
69
+ st.write(f'Tweets involving @{selected_username}:')
70
+ st.write(tweets_df[tweets_df['Username'] == selected_username])
71
+
72
+ # Save tweets as CSV file in data folder
73
+ if not os.path.exists("data"):
74
+ os.mkdir("data")
75
+ file_name = f"data/tweets_{query.replace(' ', '_')}.csv"
76
+ tweets_df.to_csv(file_name, index=False)
77
+ st.write(f"Tweets saved as {file_name}")