import time import pprint import csv import selenium from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import csv from youtube_comment_scraper_python import * import pandas as pd import plotly.express as px import re import streamlit as st st.title('Youtube WebScrap⛏️') # # ------------------------------------------------------------------------------CHANNEL DATA------------------------------------------------------------------------ chromedriver_autoinstaller.install() # driver = webdriver.Chrome('/usr/bin/google-chrome') chrome_path = '/usr/bin/google-chrome' # Set up Chrome options if needed chrome_options = webdriver.ChromeOptions() # Create the WebDriver instance chrome_options.binary_location = chrome_path driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options) # driver = webdriver.Chrome() url = st.text_input('Paste the Youtube Channel Link',"") if not url: st.warning('Please input a Link.') st.stop() st.success('Thank you for inputting a link.') # url ='https://www.youtube.com/@YasoobKhalid/videos' name = re.compile(r"[A-Z]\w+") inp = name.findall(url) out = inp[0] st.write('Getting Data from', out, 'channel') driver.get(url) url = input('Enter Youtube Video Url- ') driver.get(url) # # "https://www.youtube.com/@YasoobKhalid/videos" # channel_title = driver.find_element(By.XPATH, '//yt-formatted-string[contains(@class, "ytd-channel-name")]').text handle = driver.find_element(By.XPATH, '//yt-formatted-string[@id="channel-handle"]').text subscriber_count = driver.find_element(By.XPATH, '//yt-formatted-string[@id="subscriber-count"]').text WAIT_IN_SECONDS = 5 last_height = driver.execute_script("return document.documentElement.scrollHeight") while True: # Scroll to the bottom of page driver.execute_script("window.scrollTo(0, arguments[0]);", last_height) # Wait for new videos to show up time.sleep(WAIT_IN_SECONDS) # Calculate new document height and compare it with last height new_height = driver.execute_script("return document.documentElement.scrollHeight") if new_height == last_height: break last_height = new_height thumbnails = driver.find_elements(By.XPATH, '//a[@id="thumbnail"]/yt-image/img') views = driver.find_elements(By.XPATH,'//div[@id="metadata-line"]/span[1]') titles = driver.find_elements(By.ID, "video-title") links = driver.find_elements(By.ID, "video-title-link") # likes = driver.find_elements(By.ID, "video-title-link-likes") videos = [] for title, view, thumb, link in zip(titles, views, thumbnails, links): video_dict = { 'title': title.text, 'views': view.text, # 'likes': likes.text, 'thumbnail': thumb.get_attribute('src'), 'link': link.get_attribute('href') } videos.append(video_dict) print(videos) to_csv = videos keys = to_csv[0].keys() with open(r'C:/Users/ashok/OneDrive/Desktop/WebScrap/Youtube/output/people.csv', 'w', newline='', encoding='utf-8') as output_file: dict_writer = csv.DictWriter(output_file, keys) dict_writer.writeheader() dict_writer.writerows(to_csv) df = pd.read_csv(r'C:/Users/ashok/OneDrive/Desktop/WebScrap/Youtube/output/people.csv') st.dataframe(df)