Spaces:
Runtime error
Runtime error
File size: 2,910 Bytes
54be9ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import streamlit as st
import snscrape.modules.twitter as sntwitter
import pandas as pd
import plotly.express as px
import os
st.set_page_config(page_title="Scraping Twitter")
st.title('Scraping Twitter')
# Input query and number of tweets to scrape
query = st.text_input('Enter a search query:', 'data science')
num_tweets = st.number_input(
'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)
# Scrape tweets and store data in a dataframe
def scrape_tweets(query, num_tweets):
tweets_list = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
if i >= num_tweets:
break
tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
tweet.user.followersCount, tweet.url, tweet.user.id])
tweets_df = pd.DataFrame(tweets_list, columns=[
'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
return tweets_df
if st.button('Scrape Tweets'):
tweets_df = scrape_tweets(query, num_tweets)
st.success('Scraping done!')
# Display data
st.write(tweets_df)
# Line plot of tweet count over time
tweets_df['Date'] = tweets_df['Datetime'].dt.date
tweets_by_date = tweets_df.groupby(
['Date'])['Tweet Id'].count().reset_index()
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
st.plotly_chart(fig)
# Scatter plot of followers vs tweet count
fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
st.plotly_chart(fig)
# Username selection and interaction count
st.sidebar.title('Username and Interaction Count')
selected_username = st.sidebar.selectbox(
'Select a username:', options=tweets_df['Username'].unique())
st.sidebar.write(
f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')
# Interaction count by username
interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
).reset_index().sort_values(by=['Tweet Id'], ascending=False)
fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
st.plotly_chart(fig)
# Interaction count with selected username over time
tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
['Date'])['Tweet Id'].count().reset_index()
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
st.plotly_chart(fig)
# Tweets involving selected username
st.write(f'Tweets involving @{selected_username}:')
st.write(tweets_df[tweets_df['Username'] == selected_username])
# Save tweets as CSV file in data folder
if not os.path.exists("data"):
os.mkdir("data")
file_name = f"data/tweets_{query.replace(' ', '_')}.csv"
tweets_df.to_csv(file_name, index=False)
st.write(f"Tweets saved as {file_name}")
|