Spaces:
Runtime error
Runtime error
Commit
·
4594580
1
Parent(s):
54be9ee
Add application file
Browse files
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import snscrape.modules.twitter as sntwitter
|
3 |
+
import pandas as pd
|
4 |
+
import plotly.express as px
|
5 |
+
import os
|
6 |
+
|
7 |
+
st.set_page_config(page_title="Scraping Twitter")
|
8 |
+
|
9 |
+
st.title('Scraping Twitter')
|
10 |
+
|
11 |
+
# Input query and number of tweets to scrape
|
12 |
+
query = st.text_input('Enter a search query:', 'data science')
|
13 |
+
num_tweets = st.number_input(
|
14 |
+
'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)
|
15 |
+
|
16 |
+
# Scrape tweets and store data in a dataframe
|
17 |
+
|
18 |
+
|
19 |
+
def scrape_tweets(query, num_tweets):
|
20 |
+
tweets_list = []
|
21 |
+
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
|
22 |
+
if i >= num_tweets:
|
23 |
+
break
|
24 |
+
tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
|
25 |
+
tweet.user.followersCount, tweet.url, tweet.user.id])
|
26 |
+
tweets_df = pd.DataFrame(tweets_list, columns=[
|
27 |
+
'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
|
28 |
+
return tweets_df
|
29 |
+
|
30 |
+
|
31 |
+
if st.button('Scrape Tweets'):
|
32 |
+
tweets_df = scrape_tweets(query, num_tweets)
|
33 |
+
st.success('Scraping done!')
|
34 |
+
|
35 |
+
# Display data
|
36 |
+
st.write(tweets_df)
|
37 |
+
|
38 |
+
# Line plot of tweet count over time
|
39 |
+
tweets_df['Date'] = tweets_df['Datetime'].dt.date
|
40 |
+
tweets_by_date = tweets_df.groupby(
|
41 |
+
['Date'])['Tweet Id'].count().reset_index()
|
42 |
+
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
|
43 |
+
st.plotly_chart(fig)
|
44 |
+
|
45 |
+
# Scatter plot of followers vs tweet count
|
46 |
+
fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
|
47 |
+
st.plotly_chart(fig)
|
48 |
+
|
49 |
+
# Username selection and interaction count
|
50 |
+
st.sidebar.title('Username and Interaction Count')
|
51 |
+
selected_username = st.sidebar.selectbox(
|
52 |
+
'Select a username:', options=tweets_df['Username'].unique())
|
53 |
+
st.sidebar.write(
|
54 |
+
f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')
|
55 |
+
|
56 |
+
# Interaction count by username
|
57 |
+
interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
|
58 |
+
).reset_index().sort_values(by=['Tweet Id'], ascending=False)
|
59 |
+
fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
|
60 |
+
st.plotly_chart(fig)
|
61 |
+
|
62 |
+
# Interaction count with selected username over time
|
63 |
+
tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
|
64 |
+
['Date'])['Tweet Id'].count().reset_index()
|
65 |
+
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
|
66 |
+
st.plotly_chart(fig)
|
67 |
+
|
68 |
+
# Tweets involving selected username
|
69 |
+
st.write(f'Tweets involving @{selected_username}:')
|
70 |
+
st.write(tweets_df[tweets_df['Username'] == selected_username])
|
71 |
+
|
72 |
+
# Save tweets as CSV file in data folder
|
73 |
+
if not os.path.exists("data"):
|
74 |
+
os.mkdir("data")
|
75 |
+
file_name = f"data/tweets_{query.replace(' ', '_')}.csv"
|
76 |
+
tweets_df.to_csv(file_name, index=False)
|
77 |
+
st.write(f"Tweets saved as {file_name}")
|