Spaces:
Runtime error
Runtime error
import streamlit as st | |
import snscrape.modules.twitter as sntwitter | |
import pandas as pd | |
import plotly.express as px | |
import os | |
st.set_page_config(page_title="Scraping Twitter") | |
st.title('Scraping Twitter') | |
# Input query and number of tweets to scrape | |
query = st.text_input('Enter a search query:', 'data science') | |
num_tweets = st.number_input( | |
'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1) | |
# Scrape tweets and store data in a dataframe | |
def scrape_tweets(query, num_tweets): | |
tweets_list = [] | |
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()): | |
if i >= num_tweets: | |
break | |
tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username, | |
tweet.user.followersCount, tweet.url, tweet.user.id]) | |
tweets_df = pd.DataFrame(tweets_list, columns=[ | |
'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id']) | |
return tweets_df | |
if st.button('Scrape Tweets'): | |
tweets_df = scrape_tweets(query, num_tweets) | |
st.success('Scraping done!') | |
# Display data | |
st.write(tweets_df) | |
# Line plot of tweet count over time | |
tweets_df['Date'] = tweets_df['Datetime'].dt.date | |
tweets_by_date = tweets_df.groupby( | |
['Date'])['Tweet Id'].count().reset_index() | |
fig = px.line(tweets_by_date, x='Date', y='Tweet Id') | |
st.plotly_chart(fig) | |
# Scatter plot of followers vs tweet count | |
fig = px.scatter(tweets_df, x='Followers', y='Tweet Id') | |
st.plotly_chart(fig) | |
# Username selection and interaction count | |
st.sidebar.title('Username and Interaction Count') | |
selected_username = st.sidebar.selectbox( | |
'Select a username:', options=tweets_df['Username'].unique()) | |
st.sidebar.write( | |
f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}') | |
# Interaction count by username | |
interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count( | |
).reset_index().sort_values(by=['Tweet Id'], ascending=False) | |
fig = px.bar(interactions_by_user, x='Username', y='Tweet Id') | |
st.plotly_chart(fig) | |
# Interaction count with selected username over time | |
tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby( | |
['Date'])['Tweet Id'].count().reset_index() | |
fig = px.line(tweets_by_date, x='Date', y='Tweet Id') | |
st.plotly_chart(fig) | |
# Tweets involving selected username | |
st.write(f'Tweets involving @{selected_username}:') | |
st.write(tweets_df[tweets_df['Username'] == selected_username]) | |
# Save tweets as CSV file in data folder | |
if not os.path.exists("data"): | |
os.mkdir("data") | |
file_name = f"data/tweets_{query.replace(' ', '_')}.csv" | |
tweets_df.to_csv(file_name, index=False) | |
st.write(f"Tweets saved as {file_name}") | |