Spaces:
Sleeping
Sleeping
File size: 3,692 Bytes
725c528 eceea93 725c528 30059ee 88a2d43 e28d195 35aecae 725c528 02f4def 30059ee eceea93 725c528 eceea93 725c528 eceea93 725c528 eceea93 725c528 eceea93 725c528 eceea93 dd07bd3 eceea93 1581906 eceea93 993d08d c2e19f2 725c528 eceea93 7dbbd88 88a2d43 9dba8e7 c2e19f2 7dbbd88 eceea93 725c528 eceea93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import gradio as gr
import zipfile
import random
input_count = 300
result_count = 21
# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
with z.open('ml-latest-small/movies.csv') as f:
movies = pd.read_csv(f)
with z.open('ml-latest-small/ratings.csv') as f:
ratings = pd.read_csv(f)
# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
# Create a sparse matrix
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)
# Fit the NearestNeighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
model_knn.fit(user_item_matrix_sparse)
# Function to get movie recommendations using collaborative filtering
def get_cf_recommendations(user_id, user_item_matrix=user_item_matrix, model_knn=model_knn, movies=movies):
if user_id not in user_item_matrix.index:
return []
user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
distances, indices = model_knn.kneighbors(user_vector, n_neighbors=result_count)
similar_users = user_item_matrix.index[indices.flatten()]
similar_users_df = pd.DataFrame({'userId': similar_users, 'distance': distances.flatten()})
user_seen_movies = set(user_item_matrix.columns[user_item_matrix.loc[user_id] > 0])
recommendations = []
for _, row in similar_users_df.iterrows():
similar_user_id = row['userId']
similar_user_movies = set(user_item_matrix.columns[user_item_matrix.loc[similar_user_id] > 0])
new_movies = similar_user_movies - user_seen_movies
for movie_id in new_movies:
movie_title = movies.loc[movies['movieId'] == movie_id, 'title'].values[0]
score = 1 - row['distance'] # Convert distance to similarity score
recommendations.append((movie_title, score))
recommendations.sort(key=lambda x: x[1], reverse=True)
return recommendations[:result_count]
# Gradio interface for collaborative filtering
def recommend_movies_cf(user_id):
try:
user_id = int(user_id)
except ValueError:
return "Please enter a valid user ID (integer)."
if user_id not in user_item_matrix.index:
return f"User ID {user_id} not found in the dataset."
recommendations = get_cf_recommendations(user_id)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
# Update the existing Gradio interface
with gr.Blocks() as iface:
with gr.Tab("Content-Based Filtering"):
gr.Interface(fn=recommend_movies,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
title="Movie Recommender - Content-Based Filtering",
description="Select a movie to get recommendations based on content filtering.")
with gr.Tab("Collaborative Filtering"):
gr.Interface(fn=recommend_movies_cf,
inputs=gr.Number(label="Enter User ID"),
outputs=[gr.Textbox(label="Recommended Movies:")],
title="Movie Recommender - Collaborative Filtering",
description="Enter a user ID to get movie recommendations based on collaborative filtering.")
# Launch the app
iface.launch() |