Spaces:

moizmoizmoizmoiz
/

MovieRecommender

Sleeping

Moiz commited on Nov 23, 2024

Commit

1598421

1 Parent(s): 1c16e20

added movie list scraped

Files changed (2) hide show

imbd-scrape250.py ADDED Viewed

+from bs4 import BeautifulSoup
+import re
+import csv
+# Path to the downloaded HTML file
+html_file_path = "/Users/moizpro/Desktop/MoviesRecommender/MovieRecommender/imdb_top250.html"  # Replace with your file's path
+# Open and parse the HTML file
+with open(html_file_path, "r", encoding="utf-8") as file:
+    soup = BeautifulSoup(file, "html.parser")
+# Find all movie links
+movie_links = soup.find_all("a", href=re.compile(r"/title/tt\d+/"))
+# Extract movie codes using regex
+movie_codes = []
+for link in movie_links:
+    href = link["href"]
+    match = re.search(r"/title/(tt\d+)/", href)
+    if match:
+        movie_codes.append(match.group(1))
+# Remove duplicates
+movie_codes = list(set(movie_codes))
+# Save movie codes to a CSV file
+csv_file = "imdb_movie_codes.csv"
+with open(csv_file, "w", newline="", encoding="utf-8") as file:
+    writer = csv.writer(file)
+    writer.writerow(["Movie Code"])
+    for code in movie_codes:
+        writer.writerow([code])
+print(f"Scraped {len(movie_codes)} movie codes and saved to '{csv_file}'.")

imdb-scrape250.py ADDED Viewed

+from bs4 import BeautifulSoup
+import re
+import csv
+# Path to the downloaded HTML file
+html_file_path = "/Users/moizpro/Desktop/MoviesRecommender/MovieRecommender/imdb_top250.html"  # Replace with your file's path
+# Open and parse the HTML file
+with open(html_file_path, "r", encoding="utf-8") as file:
+    soup = BeautifulSoup(file, "html.parser")
+# Find all movie links
+movie_links = soup.find_all("a", href=re.compile(r"/title/tt\d+/"))
+# Extract movie codes using regex
+movie_codes = []
+for link in movie_links:
+    href = link["href"]
+    match = re.search(r"/title/(tt\d+)/", href)
+    if match:
+        movie_codes.append(match.group(1))
+# Remove duplicates
+movie_codes = list(set(movie_codes))
+# Save movie codes to a CSV file
+csv_file = "imdb_movie_codes.csv"
+with open(csv_file, "w", newline="", encoding="utf-8") as file:
+    writer = csv.writer(file)
+    writer.writerow(["Movie Code"])
+    for code in movie_codes:
+        writer.writerow([code])
+print(f"Scraped {len(movie_codes)} movie codes and saved to '{csv_file}'.")