File size: 3,692 Bytes
725c528
eceea93
 
 
725c528
30059ee
88a2d43
 
e28d195
35aecae
725c528
02f4def
 
30059ee
 
eceea93
 
725c528
eceea93
 
725c528
eceea93
 
725c528
eceea93
 
 
725c528
eceea93
 
 
 
725c528
eceea93
 
dd07bd3
eceea93
 
 
 
 
 
 
 
 
 
1581906
eceea93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
993d08d
c2e19f2
725c528
eceea93
7dbbd88
 
 
88a2d43
9dba8e7
c2e19f2
 
7dbbd88
 
eceea93
 
 
 
 
 
725c528
eceea93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import gradio as gr
import zipfile
import random

input_count = 300
result_count = 21

# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
    with z.open('ml-latest-small/movies.csv') as f:
        movies = pd.read_csv(f)
    with z.open('ml-latest-small/ratings.csv') as f:
        ratings = pd.read_csv(f)

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Create a sparse matrix
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)

# Fit the NearestNeighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
model_knn.fit(user_item_matrix_sparse)

# Function to get movie recommendations using collaborative filtering
def get_cf_recommendations(user_id, user_item_matrix=user_item_matrix, model_knn=model_knn, movies=movies):
    if user_id not in user_item_matrix.index:
        return []

    user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
    distances, indices = model_knn.kneighbors(user_vector, n_neighbors=result_count)
    
    similar_users = user_item_matrix.index[indices.flatten()]
    similar_users_df = pd.DataFrame({'userId': similar_users, 'distance': distances.flatten()})
    
    user_seen_movies = set(user_item_matrix.columns[user_item_matrix.loc[user_id] > 0])
    
    recommendations = []
    for _, row in similar_users_df.iterrows():
        similar_user_id = row['userId']
        similar_user_movies = set(user_item_matrix.columns[user_item_matrix.loc[similar_user_id] > 0])
        new_movies = similar_user_movies - user_seen_movies
        
        for movie_id in new_movies:
            movie_title = movies.loc[movies['movieId'] == movie_id, 'title'].values[0]
            score = 1 - row['distance']  # Convert distance to similarity score
            recommendations.append((movie_title, score))
    
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:result_count]

# Gradio interface for collaborative filtering
def recommend_movies_cf(user_id):
    try:
        user_id = int(user_id)
    except ValueError:
        return "Please enter a valid user ID (integer)."
    
    if user_id not in user_item_matrix.index:
        return f"User ID {user_id} not found in the dataset."
    
    recommendations = get_cf_recommendations(user_id)
    format_string = "{:>5.2f}       {:<20}"
    return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])

# Update the existing Gradio interface
with gr.Blocks() as iface:
    with gr.Tab("Content-Based Filtering"):
        gr.Interface(fn=recommend_movies, 
                     inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), 
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     title="Movie Recommender - Content-Based Filtering", 
                     description="Select a movie to get recommendations based on content filtering.")
    
    with gr.Tab("Collaborative Filtering"):
        gr.Interface(fn=recommend_movies_cf,
                     inputs=gr.Number(label="Enter User ID"),
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     title="Movie Recommender - Collaborative Filtering",
                     description="Enter a user ID to get movie recommendations based on collaborative filtering.")

# Launch the app
iface.launch()