hqasmei commited on
Commit
6322bb2
1 Parent(s): 269d6bf

Imported datasets and used gradio]

Browse files
Files changed (2) hide show
  1. app.py +139 -4
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,7 +1,142 @@
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ from tensorflow import keras
6
+ from pathlib import Path
7
+ import matplotlib.pyplot as plt
8
  import gradio as gr
9
+ from huggingface_hub import from_pretrained_keras
10
+ from datasets import load_dataset
11
 
12
+ book_data_load = load_dataset("hqasmei/ml-capstone-project-dataset", data_files="book_data.csv")
13
+ filtered_data_load = load_dataset("hqasmei/ml-capstone-project-dataset", data_files="filtered_data.csv")
14
 
15
+ book_data_arr = []
16
+ filtered_data_arr = []
17
+
18
+ for item in book_data_load['train']:
19
+ book_data_arr.append(item)
20
+
21
+ for item in filtered_data_load['train']:
22
+ filtered_data_arr.append(item)
23
+
24
+ book_df = pd.DataFrame(book_data_arr)
25
+ filtered_df = pd.DataFrame(filtered_data_arr)
26
+
27
+ # Make the encodings for users
28
+ user_ids = filtered_df["user_id"].unique().tolist()
29
+ user2user_encoded = {x: i for i, x in enumerate(user_ids)}
30
+ user_encoded2user = {i: x for i, x in enumerate(user_ids)}
31
+ filtered_df["user"] = filtered_df["user_id"].map(user2user_encoded)
32
+ num_users = len(user2user_encoded)
33
+
34
+ # Make the encodings for books
35
+ book_ids = filtered_df["book_id"].unique().tolist()
36
+ book2book_encoded = {x: i for i, x in enumerate(book_ids)}
37
+ book_encoded2book = {i: x for i, x in enumerate(book_ids)}
38
+ filtered_df["book"] = filtered_df["book_id"].map(book2book_encoded)
39
+ num_books = len(book_encoded2book)
40
+
41
+ # Set ratings type
42
+ filtered_df["rating"] = filtered_df["rating"].values.astype(np.float32)
43
+
44
+ # Load model
45
+ model = from_pretrained_keras('hqasmei/ml-model')
46
+
47
+
48
+ def update_user(id):
49
+ return get_top_rated_books_from_user(id), get_recommendations(id)
50
+
51
+
52
+ def get_top_rated_books_from_user(id):
53
+ decoded_id = user_encoded2user.get(id)
54
+
55
+ # Get the top rated books by this user
56
+ books_read_by_user = filtered_df[filtered_df.user_id == decoded_id]
57
+ top_books_user = (books_read_by_user.sort_values(by="rating", ascending=False).head(5).book_id.values)
58
+ book_df_rows = book_df[book_df["book_id"].isin(top_books_user)]
59
+ book_df_rows = book_df_rows.drop('book_id', axis=1)
60
+ return book_df_rows
61
+
62
+ def random_user():
63
+ return update_user(np.random.randint(0, num_users-1))
64
+
65
+ def get_recommendations(id):
66
+ decoded_id = user_encoded2user.get(id)
67
+
68
+ # Get the top 10 recommended books for this user
69
+ books_read_by_user = filtered_df[filtered_df.user_id == decoded_id]
70
+ books_not_read = book_df[~book_df["book_id"].isin(books_read_by_user.book_id.values)]["book_id"]
71
+ books_not_read = list(set(books_not_read).intersection(set(book2book_encoded.keys())))
72
+ books_not_read = [[book2book_encoded.get(x)] for x in books_not_read]
73
+
74
+ # Encoded user id
75
+ encoded_id = id
76
+
77
+ # Create data [[user_id, book_id],...]
78
+ user_book_array = np.hstack(([[encoded_id]] * len(books_not_read), books_not_read))
79
+
80
+ # Predict ratings for books not read
81
+ ratings = model.predict(user_book_array).flatten()
82
+
83
+ # Get indices of top ten books
84
+ top_ratings_indices = ratings.argsort()[-10:][::-1]
85
+
86
+ # Decode each book
87
+ recommended_book_ids = [book_encoded2book.get(books_not_read[x][0]) for x in top_ratings_indices]
88
+ recommended_books = book_df[book_df["book_id"].isin(recommended_book_ids)]
89
+ recommended_books = recommended_books.drop('book_id', axis=1)
90
+
91
+ return recommended_books
92
+
93
+ demo = gr.Blocks()
94
+
95
+ with demo:
96
+ gr.Markdown("""
97
+ <div>
98
+ <h1 style='text-align: center'>Book Recommender</h1>
99
+ Collaborative Filtering is used to predict the top 10 recommended books for a particular user from the dataset based on that user and previous books they have rated.
100
+
101
+ Note: Currently there is a bug with sliders. If you "click and drag" on the slider it will not use the correct user. Please only "click" on the slider.
102
+ </div>
103
+ """)
104
+
105
+ with gr.Box():
106
+ gr.Markdown(
107
+ """
108
+ ### Input
109
+ #### Select a user to get recommendations for.
110
+ """)
111
+
112
+ inp1 = gr.Slider(0, num_users-1, value=0, label='User')
113
+ # btn1 = gr.Button('Random User')
114
+
115
+ # top_rated_from_user = get_top_rated_from_user(0)
116
+ gr.Markdown(
117
+ """
118
+ <br>
119
+ """)
120
+ gr.Markdown(
121
+ """
122
+ #### Books with the Highest Ratings from this user
123
+ """)
124
+ df1 = gr.DataFrame(headers=["title"], datatype=["str"], interactive=False)
125
+
126
+ with gr.Box():
127
+ # recommendations = get_recommendations(0)
128
+ gr.Markdown(
129
+ """
130
+ ### Output
131
+ #### Top 10 book recommendations
132
+ """)
133
+ df2 = gr.DataFrame(headers=["title"], datatype=["str"], interactive=False)
134
+
135
+
136
+
137
+ inp1.change(fn=update_user,
138
+ inputs=inp1,
139
+ outputs=[df1, df2])
140
+
141
+
142
+ demo.launch(debug=True)
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- tensorflow
 
 
 
1
+ tensorflow
2
+ gdown
3
+ datasets