Spaces:
Build error
Build error
File size: 3,453 Bytes
4292ffa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import time
import pprint
import csv
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import csv
from youtube_comment_scraper_python import *
import pandas as pd
import plotly.express as px
import re
import streamlit as st
st.title('Youtube WebScrap⛏️')
# # ------------------------------------------------------------------------------CHANNEL DATA------------------------------------------------------------------------
chromedriver_autoinstaller.install()
# driver = webdriver.Chrome('/usr/bin/google-chrome')
chrome_path = '/usr/bin/google-chrome'
# Set up Chrome options if needed
chrome_options = webdriver.ChromeOptions()
# Create the WebDriver instance
chrome_options.binary_location = chrome_path
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
# driver = webdriver.Chrome()
url = st.text_input('Paste the Youtube Channel Link',"")
if not url:
st.warning('Please input a Link.')
st.stop()
st.success('Thank you for inputting a link.')
# url ='https://www.youtube.com/@YasoobKhalid/videos'
name = re.compile(r"[A-Z]\w+")
inp = name.findall(url)
out = inp[0]
st.write('Getting Data from', out, 'channel')
driver.get(url)
url = input('Enter Youtube Video Url- ')
driver.get(url)
# # "https://www.youtube.com/@YasoobKhalid/videos"
# channel_title = driver.find_element(By.XPATH, '//yt-formatted-string[contains(@class, "ytd-channel-name")]').text
handle = driver.find_element(By.XPATH, '//yt-formatted-string[@id="channel-handle"]').text
subscriber_count = driver.find_element(By.XPATH, '//yt-formatted-string[@id="subscriber-count"]').text
WAIT_IN_SECONDS = 5
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
# Scroll to the bottom of page
driver.execute_script("window.scrollTo(0, arguments[0]);", last_height)
# Wait for new videos to show up
time.sleep(WAIT_IN_SECONDS)
# Calculate new document height and compare it with last height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
break
last_height = new_height
thumbnails = driver.find_elements(By.XPATH, '//a[@id="thumbnail"]/yt-image/img')
views = driver.find_elements(By.XPATH,'//div[@id="metadata-line"]/span[1]')
titles = driver.find_elements(By.ID, "video-title")
links = driver.find_elements(By.ID, "video-title-link")
# likes = driver.find_elements(By.ID, "video-title-link-likes")
videos = []
for title, view, thumb, link in zip(titles, views, thumbnails, links):
video_dict = {
'title': title.text,
'views': view.text,
# 'likes': likes.text,
'thumbnail': thumb.get_attribute('src'),
'link': link.get_attribute('href')
}
videos.append(video_dict)
print(videos)
to_csv = videos
keys = to_csv[0].keys()
with open(r'C:/Users/ashok/OneDrive/Desktop/WebScrap/Youtube/output/people.csv', 'w', newline='', encoding='utf-8') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(to_csv)
df = pd.read_csv(r'C:/Users/ashok/OneDrive/Desktop/WebScrap/Youtube/output/people.csv')
st.dataframe(df)
|