MovieRecommender / imdb-scrape250.py
Moiz
added movie list scraped
1598421
from bs4 import BeautifulSoup
import re
import csv
# Path to the downloaded HTML file
html_file_path = "/Users/moizpro/Desktop/MoviesRecommender/MovieRecommender/imdb_top250.html" # Replace with your file's path
# Open and parse the HTML file
with open(html_file_path, "r", encoding="utf-8") as file:
soup = BeautifulSoup(file, "html.parser")
# Find all movie links
movie_links = soup.find_all("a", href=re.compile(r"/title/tt\d+/"))
# Extract movie codes using regex
movie_codes = []
for link in movie_links:
href = link["href"]
match = re.search(r"/title/(tt\d+)/", href)
if match:
movie_codes.append(match.group(1))
# Remove duplicates
movie_codes = list(set(movie_codes))
# Save movie codes to a CSV file
csv_file = "imdb_movie_codes.csv"
with open(csv_file, "w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow(["Movie Code"])
for code in movie_codes:
writer.writerow([code])
print(f"Scraped {len(movie_codes)} movie codes and saved to '{csv_file}'.")