Spaces:
Runtime error
Runtime error
Commit
·
54be9ee
1
Parent(s):
c16d6d7
Add application file
Browse files- data/tweets_data_science.csv +25 -0
- requirements.txt +5 -0
- test.py +77 -0
data/tweets_data_science.csv
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Tweet Id,Datetime,Text,Username,Followers,URL,User Id,Date
|
2 |
+
1628984431733248001,2023-02-24 05:05:28+00:00,"Boss: Susah lho cari kerja skrg
|
3 |
+
|
4 |
+
Me: mba tau kan kalo kerjaan kita ini spesialis, kebutuhan market banyak orangnya dikit karena skrg org2 lebih tertarik ke data science dibanding actuarial. Hhehe
|
5 |
+
|
6 |
+
Boss: itu aja alasannya? Nanti saya diskusiin dulu sama HR ya
|
7 |
+
|
8 |
+
Me: iya mba hehe.",rollersk8erboi,4773,https://twitter.com/rollersk8erboi/status/1628984431733248001,898242912693100544,2023-02-24
|
9 |
+
1628977101415096320,2023-02-24 04:36:20+00:00,"Gue bisa susun konten mengenai tips belajar suatu subject (tentu contohnya Data Science) menggunakan Chat GPT. Mulai dari prompt yg disarankan, memilih pertanyaan yang tepat, jadi temen belajar, dst.
|
10 |
+
|
11 |
+
Kalau tertarik, boleh.",MikaelDewabrata,4436,https://twitter.com/MikaelDewabrata/status/1628977101415096320,1457701454,2023-02-24
|
12 |
+
1628969012393840640,2023-02-24 04:04:11+00:00,"Python merupakan salah satu bahasa pemrograman yang bisa kamu pelajari untuk memulai karir sebagai Data Scientist loh. Yuk kenalan dengan apa itu Python dan mengapa Python cocok bagi pemula di artikel berikut ini!
|
13 |
+
https://t.co/BSkWQhm0Ql
|
14 |
+
|
15 |
+
#DataScience #DataScientist #DataEngineer",DQLab_id,1281,https://twitter.com/DQLab_id/status/1628969012393840640,1328251677334388743,2023-02-24
|
16 |
+
1628965342897901568,2023-02-24 03:49:37+00:00,"Kayaknya lbh bermanfaat buat publik jika yg pny skill di data science utk grab data LHKPN dan buat pemeringkatan, dan identifikasi atas pejabat2 yg mengalami kenaikan harta atau jumlah harta tak wajar.
|
17 |
+
|
18 |
+
Yg model di bawah mah kasih ke netijen aja …",elisa_jkt,67460,https://twitter.com/elisa_jkt/status/1628965342897901568,44556194,2023-02-24
|
19 |
+
1628953977705361408,2023-02-24 03:04:27+00:00,HIRING: Data Modeller / India - Bengaluru https://t.co/nReLF9VdmA #AI #MachineLearning #DataJobs #MLjobs #bigdata #DataScience #AIjobs #AIcareers #hiringnow #IndiaBengaluru #Agile #Architecture #Banking #BigData #BigQuery #Creditrisk #Dataquality #Datawarehouse #ELT #ETL #SQL,ai_jobsNET,1656,https://twitter.com/ai_jobsNET/status/1628953977705361408,1014944290236174336,2023-02-24
|
20 |
+
1628926334335619072,2023-02-24 01:14:36+00:00,@markasnyajoki Bisa banget nih kak bibi bantu. Spes soshum dan it (Data science) yaa.. boleh tanya2 dulu ke wa di bioo https://t.co/FtCePxD2nV,kickassworker,131,https://twitter.com/kickassworker/status/1628926334335619072,1349279519253819395,2023-02-24
|
21 |
+
1628924215863943168,2023-02-24 01:06:11+00:00,"@collegemenfess Statistika, data science, data engineering wkwkkw",ailamaika,148,https://twitter.com/ailamaika/status/1628924215863943168,1118993887903621120,2023-02-24
|
22 |
+
1628920136026689536,2023-02-24 00:49:58+00:00,"Yuk yang butuh dibantu skripsi/thesis/tugas akhir/harian terkait data mining (scrapping/crawling/preprocessing/preparing/analysis/evaluation) bisa banget kami bantu.
|
23 |
+
Spesialis data science, bisa pakai tools google colab(python), rapidminer, orange, vosviewer (bibliometrik). https://t.co/K6j3AwgOJX",thetaskmasterid,36,https://twitter.com/thetaskmasterid/status/1628920136026689536,1549995678692040704,2023-02-24
|
24 |
+
1628909481689075713,2023-02-24 00:07:38+00:00,"HIRING: Data Software Engineer, Finance Engineering / Bengaluru https://t.co/G3k8C9ZxtR #AI #MachineLearning #DataJobs #MLjobs #bigdata #DataScience #AIjobs #AIcareers #hiringnow #Bengaluru #BusinessIntelligence #Datapipelines #Datawarehouse #Engineering #Finance #Flink #OLAP",ai_jobsNET,1656,https://twitter.com/ai_jobsNET/status/1628909481689075713,1014944290236174336,2023-02-24
|
25 |
+
1628864076188188679,2023-02-23 21:07:13+00:00,"HIRING: Data Analyst - Marketing & Media / Chiasso, Switzerland https://t.co/BkkLF5Dgx1 #AI #MachineLearning #DataJobs #AIcareers #MLjobs #bigdata #DataScience #AIjobs #hiringnow #Chiasso #Switzerland #BusinessIntelligence #Datapipelines #Ecommerce #Engineering #Excel #Python",remote_ai_jobs,824,https://twitter.com/remote_ai_jobs/status/1628864076188188679,1202221488,2023-02-23
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
snscrape
|
3 |
+
plotly
|
4 |
+
seaborn
|
5 |
+
pandas
|
test.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import snscrape.modules.twitter as sntwitter
|
3 |
+
import pandas as pd
|
4 |
+
import plotly.express as px
|
5 |
+
import os
|
6 |
+
|
7 |
+
st.set_page_config(page_title="Scraping Twitter")
|
8 |
+
|
9 |
+
st.title('Scraping Twitter')
|
10 |
+
|
11 |
+
# Input query and number of tweets to scrape
|
12 |
+
query = st.text_input('Enter a search query:', 'data science')
|
13 |
+
num_tweets = st.number_input(
|
14 |
+
'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)
|
15 |
+
|
16 |
+
# Scrape tweets and store data in a dataframe
|
17 |
+
|
18 |
+
|
19 |
+
def scrape_tweets(query, num_tweets):
|
20 |
+
tweets_list = []
|
21 |
+
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
|
22 |
+
if i >= num_tweets:
|
23 |
+
break
|
24 |
+
tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
|
25 |
+
tweet.user.followersCount, tweet.url, tweet.user.id])
|
26 |
+
tweets_df = pd.DataFrame(tweets_list, columns=[
|
27 |
+
'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
|
28 |
+
return tweets_df
|
29 |
+
|
30 |
+
|
31 |
+
if st.button('Scrape Tweets'):
|
32 |
+
tweets_df = scrape_tweets(query, num_tweets)
|
33 |
+
st.success('Scraping done!')
|
34 |
+
|
35 |
+
# Display data
|
36 |
+
st.write(tweets_df)
|
37 |
+
|
38 |
+
# Line plot of tweet count over time
|
39 |
+
tweets_df['Date'] = tweets_df['Datetime'].dt.date
|
40 |
+
tweets_by_date = tweets_df.groupby(
|
41 |
+
['Date'])['Tweet Id'].count().reset_index()
|
42 |
+
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
|
43 |
+
st.plotly_chart(fig)
|
44 |
+
|
45 |
+
# Scatter plot of followers vs tweet count
|
46 |
+
fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
|
47 |
+
st.plotly_chart(fig)
|
48 |
+
|
49 |
+
# Username selection and interaction count
|
50 |
+
st.sidebar.title('Username and Interaction Count')
|
51 |
+
selected_username = st.sidebar.selectbox(
|
52 |
+
'Select a username:', options=tweets_df['Username'].unique())
|
53 |
+
st.sidebar.write(
|
54 |
+
f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')
|
55 |
+
|
56 |
+
# Interaction count by username
|
57 |
+
interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
|
58 |
+
).reset_index().sort_values(by=['Tweet Id'], ascending=False)
|
59 |
+
fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
|
60 |
+
st.plotly_chart(fig)
|
61 |
+
|
62 |
+
# Interaction count with selected username over time
|
63 |
+
tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
|
64 |
+
['Date'])['Tweet Id'].count().reset_index()
|
65 |
+
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
|
66 |
+
st.plotly_chart(fig)
|
67 |
+
|
68 |
+
# Tweets involving selected username
|
69 |
+
st.write(f'Tweets involving @{selected_username}:')
|
70 |
+
st.write(tweets_df[tweets_df['Username'] == selected_username])
|
71 |
+
|
72 |
+
# Save tweets as CSV file in data folder
|
73 |
+
if not os.path.exists("data"):
|
74 |
+
os.mkdir("data")
|
75 |
+
file_name = f"data/tweets_{query.replace(' ', '_')}.csv"
|
76 |
+
tweets_df.to_csv(file_name, index=False)
|
77 |
+
st.write(f"Tweets saved as {file_name}")
|