Spaces:

podsni
/

Twitter-Analysis-scrap

Runtime error

App Files Files Community

Twitter-Analysis-scrap / app.py

podsnigame

update

c70e931 about 2 years ago

raw

history blame contribute delete

3.36 kB

	import streamlit as st
	import snscrape.modules.twitter as sntwitter
	import pandas as pd
	import plotly.express as px
	# import base64
	import pybase64


	st.set_page_config(page_title="Scraping Twitter")

	st.title('Scraping Twitter')

	# Input query and number of tweets to scrape
	query = st.text_input('Enter a search query:', 'data science')
	num_tweets = st.number_input(
	'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)

	# Scrape tweets and store data in a dataframe


	def scrape_tweets(query, num_tweets):
	tweets_list = []
	for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
	if i >= num_tweets:
	break
	tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
	tweet.user.followersCount, tweet.url, tweet.user.id])
	tweets_df = pd.DataFrame(tweets_list, columns=[
	'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
	return tweets_df


	if st.button('Scrape Tweets'):
	tweets_df = scrape_tweets(query, num_tweets)
	st.success('Scraping done!')

	# Display data
	st.write(tweets_df)

	# Line plot of tweet count over time
	tweets_df['Date'] = tweets_df['Datetime'].dt.date
	tweets_by_date = tweets_df.groupby(
	['Date'])['Tweet Id'].count().reset_index()
	fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
	st.plotly_chart(fig)

	# Scatter plot of followers vs tweet count
	fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
	st.plotly_chart(fig)

	# Username selection and interaction count
	st.sidebar.title('Username and Interaction Count')
	selected_username = st.sidebar.selectbox(
	'Select a username:', options=tweets_df['Username'].unique())
	st.sidebar.write(
	f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')

	# Interaction count by username
	interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
	).reset_index().sort_values(by=['Tweet Id'], ascending=False)
	fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
	st.plotly_chart(fig)

	# Interaction count with selected username over time
	tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
	['Date'])['Tweet Id'].count().reset_index()
	fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
	st.plotly_chart(fig)

	# Tweets involving selected username
	st.write(f'Tweets involving @{selected_username}:')
	st.write(tweets_df[tweets_df['Username'] == selected_username])

	# Download CSV
	if len(tweets_df) > 0:
	csv = tweets_df.to_csv(index=False)
	b64 = pybase64.b64encode(csv.encode()).decode()
	filename = f'tweets_{query.replace(" ", "_")}.csv'
	href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download CSV</a>'
	st.markdown(href, unsafe_allow_html=True)

	if len(tweets_df) > 0:
	txt = "\n\n".join(tweets_df['Text'].tolist())
	b64 = pybase64.b64encode(txt.encode()).decode()
	txt_filename = f"{query}_tweets.txt"
	txt_href = f'<a href="data:file/txt;base64,{b64}" download="{txt_filename}">Download TXT</a>'
	st.markdown(txt_href, unsafe_allow_html=True)