Spaces:

moizmoizmoizmoiz
/

MovieRecommender

Sleeping

Moiz commited on Nov 23, 2024

Commit

db0358a

1 Parent(s): ca528b9

update

Files changed (2) hide show

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

imbd-scrape250.py DELETED Viewed

@@ -1,34 +0,0 @@
-from bs4 import BeautifulSoup
-import re
-import csv
-# Path to the downloaded HTML file
-html_file_path = "/Users/moizpro/Desktop/MoviesRecommender/MovieRecommender/imdb_top250.html"  # Replace with your file's path
-# Open and parse the HTML file
-with open(html_file_path, "r", encoding="utf-8") as file:
-    soup = BeautifulSoup(file, "html.parser")
-# Find all movie links
-movie_links = soup.find_all("a", href=re.compile(r"/title/tt\d+/"))
-# Extract movie codes using regex
-movie_codes = []
-for link in movie_links:
-    href = link["href"]
-    match = re.search(r"/title/(tt\d+)/", href)
-    if match:
-        movie_codes.append(match.group(1))
-# Remove duplicates
-movie_codes = list(set(movie_codes))
-# Save movie codes to a CSV file
-csv_file = "imdb_movie_codes.csv"
-with open(csv_file, "w", newline="", encoding="utf-8") as file:
-    writer = csv.writer(file)
-    writer.writerow(["Movie Code"])
-    for code in movie_codes:
-        writer.writerow([code])
-print(f"Scraped {len(movie_codes)} movie codes and saved to '{csv_file}'.")