Spaces:
Build error
Build error
File size: 4,508 Bytes
4292ffa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import time
import pprint
import csv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import csv
from youtube_comment_scraper_python import *
import pandas as pd
import plotly.express as px
import re
import streamlit as st
st.title('Youtube Channel Analysis')
st.write('Youtube WebScrap')
# # ------------------------------------------------------------------------------CHANNEL DATA------------------------------------------------------------------------
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url = st.text_input('Paste the Youtube Channel Link',"")
if not url:
st.warning('Please input a Link.')
st.stop()
st.success('Thank you for inputting a link.')
# url ='https://www.youtube.com/@YasoobKhalid/videos'
name = re.compile(r"[A-Z]\w+")
inp = name.findall(url)
out = inp[0]
st.write('Getting Data from', out, 'channel')
driver.get(url)
# url = input('Enter Youtube Video Url- ')
# driver.get(url)
# # "https://www.youtube.com/@YasoobKhalid/videos"
# channel_title = driver.find_element(By.XPATH, '//yt-formatted-string[contains(@class, "ytd-channel-name")]').text
handle = driver.find_element(By.XPATH, '//yt-formatted-string[@id="channel-handle"]').text
subscriber_count = driver.find_element(By.XPATH, '//yt-formatted-string[@id="subscriber-count"]').text
WAIT_IN_SECONDS = 5
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
# Scroll to the bottom of page
driver.execute_script("window.scrollTo(0, arguments[0]);", last_height)
# Wait for new videos to show up
time.sleep(WAIT_IN_SECONDS)
# Calculate new document height and compare it with last height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
break
last_height = new_height
thumbnails = driver.find_elements(By.XPATH, '//a[@id="thumbnail"]/yt-image/img')
views = driver.find_elements(By.XPATH,'//div[@id="metadata-line"]/span[1]')
titles = driver.find_elements(By.ID, "video-title")
links = driver.find_elements(By.ID, "video-title-link")
# likes = driver.find_elements(By.ID, "video-title-link-likes")
videos = []
for title, view, thumb, link in zip(titles, views, thumbnails, links):
video_dict = {
'title': title.text,
'views': view.text,
# 'likes': likes.text,
'thumbnail': thumb.get_attribute('src'),
'link': link.get_attribute('href')
}
videos.append(video_dict)
print(videos)
to_csv = videos
keys = to_csv[0].keys()
with open('output/people.csv', 'w', newline='', encoding='utf-8') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(to_csv)
df = pd.read_csv('output/people.csv')
st.dataframe(df)
count = st.slider('Select Lower Video Count', 0, 607, 100)
st.write("You selected", count, 'Videos')
fig = px.bar(df,
x="title",
y="views", height=600
)
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
# fig.update_yaxes(tickvals=['10k', '22k', '29k', '56k'])
tab1, tab2 = st.tabs(["Streamlit theme (default)", "Plotly native theme"])
with tab1:
# Use the Streamlit theme.
# This is the default. So you can also omit the theme argument.
st.plotly_chart(fig, theme="streamlit", use_container_width=True)
with tab2:
# Use the native Plotly theme.
st.plotly_chart(fig, theme=None, use_container_width=True)
# ----------------------------------------------------------------------------COMMENTS------------------------------------------------------------------------------
# url = input('Enter Youtube Video Url- ')
# youtube.open(url)
# youtube.keypress("pagedown")
# data = []
# currentpagesource=youtube.get_page_source()
# lastpagesource=''
# while(True):
# if(lastpagesource==currentpagesource):
# break
# lastpagesource=currentpagesource
# response=youtube.video_comments()
# for c in response['body']:
# data.append(c)
# youtube.scroll()
# currentpagesource=youtube.get_page_source()
# df = pd.DataFrame(data)
# df = df.replace('\n',' ', regex=True)
# df = df[['Comment', 'Likes']].drop_duplicates(keep="first")
# # df = df[['Likes']].drop_duplicates(keep="first")
# df.to_csv('output/data.csv',index=False)
# df.head() |