Spaces:
Sleeping
Sleeping
API
Browse files- DataSetSample.xlsx +0 -0
- Recommendation_System_in_Python.ipynb +392 -0
- app.py +88 -1
- movies.csv +0 -0
- sample.py +123 -0
DataSetSample.xlsx
ADDED
Binary file (24.4 kB). View file
|
|
Recommendation_System_in_Python.ipynb
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": []
|
7 |
+
},
|
8 |
+
"kernelspec": {
|
9 |
+
"name": "python3",
|
10 |
+
"display_name": "Python 3"
|
11 |
+
},
|
12 |
+
"language_info": {
|
13 |
+
"name": "python"
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"cells": [
|
17 |
+
{
|
18 |
+
"cell_type": "code",
|
19 |
+
"execution_count": null,
|
20 |
+
"metadata": {
|
21 |
+
"id": "SKRYfHwWyVaG"
|
22 |
+
},
|
23 |
+
"outputs": [],
|
24 |
+
"source": [
|
25 |
+
"# Importing Libraries\n",
|
26 |
+
"import numpy as np\n",
|
27 |
+
"import pandas as pd\n",
|
28 |
+
"import sklearn\n",
|
29 |
+
"import matplotlib.pyplot as plt\n",
|
30 |
+
"import seaborn as sns\n",
|
31 |
+
"import warnings\n",
|
32 |
+
"warnings.simplefilter(action='ignore', category=FutureWarning)"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "code",
|
37 |
+
"source": [
|
38 |
+
"#loading rating dataset\n",
|
39 |
+
"ratings = pd.read_csv(\"https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv\")\n",
|
40 |
+
"print(ratings.head())"
|
41 |
+
],
|
42 |
+
"metadata": {
|
43 |
+
"colab": {
|
44 |
+
"base_uri": "https://localhost:8080/"
|
45 |
+
},
|
46 |
+
"id": "v_ZFn93Wy1ho",
|
47 |
+
"outputId": "97f98476-d909-4050-bc37-68369391d756"
|
48 |
+
},
|
49 |
+
"execution_count": null,
|
50 |
+
"outputs": [
|
51 |
+
{
|
52 |
+
"output_type": "stream",
|
53 |
+
"name": "stdout",
|
54 |
+
"text": [
|
55 |
+
" userId movieId rating timestamp\n",
|
56 |
+
"0 1 1 4.0 964982703\n",
|
57 |
+
"1 1 3 4.0 964981247\n",
|
58 |
+
"2 1 6 4.0 964982224\n",
|
59 |
+
"3 1 47 5.0 964983815\n",
|
60 |
+
"4 1 50 5.0 964982931\n"
|
61 |
+
]
|
62 |
+
}
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "code",
|
67 |
+
"source": [
|
68 |
+
"# loading movie dataset\n",
|
69 |
+
"movies = pd.read_csv(\"https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv\")\n",
|
70 |
+
"print(movies.head())"
|
71 |
+
],
|
72 |
+
"metadata": {
|
73 |
+
"colab": {
|
74 |
+
"base_uri": "https://localhost:8080/"
|
75 |
+
},
|
76 |
+
"id": "_RCPOQWfy269",
|
77 |
+
"outputId": "4c3c68a1-dbbb-4795-d96a-4f9c11d3731b"
|
78 |
+
},
|
79 |
+
"execution_count": null,
|
80 |
+
"outputs": [
|
81 |
+
{
|
82 |
+
"output_type": "stream",
|
83 |
+
"name": "stdout",
|
84 |
+
"text": [
|
85 |
+
" movieId title \\\n",
|
86 |
+
"0 1 Toy Story (1995) \n",
|
87 |
+
"1 2 Jumanji (1995) \n",
|
88 |
+
"2 3 Grumpier Old Men (1995) \n",
|
89 |
+
"3 4 Waiting to Exhale (1995) \n",
|
90 |
+
"4 5 Father of the Bride Part II (1995) \n",
|
91 |
+
"\n",
|
92 |
+
" genres \n",
|
93 |
+
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
|
94 |
+
"1 Adventure|Children|Fantasy \n",
|
95 |
+
"2 Comedy|Romance \n",
|
96 |
+
"3 Comedy|Drama|Romance \n",
|
97 |
+
"4 Comedy \n"
|
98 |
+
]
|
99 |
+
}
|
100 |
+
]
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"cell_type": "code",
|
104 |
+
"source": [
|
105 |
+
"n_ratings = len(ratings)\n",
|
106 |
+
"n_movies = len(ratings['movieId'].unique())\n",
|
107 |
+
"n_users = len(ratings['userId'].unique())\n",
|
108 |
+
"\n",
|
109 |
+
"print(f\"Number of ratings: {n_ratings}\")\n",
|
110 |
+
"print(f\"Number of unique movieId's: {n_movies}\")\n",
|
111 |
+
"print(f\"Number of unique users: {n_users}\")\n",
|
112 |
+
"print(f\"Average ratings per user: {round(n_ratings/n_users, 2)}\")\n",
|
113 |
+
"print(f\"Average ratings per movie: {round(n_ratings/n_movies, 2)}\")"
|
114 |
+
],
|
115 |
+
"metadata": {
|
116 |
+
"colab": {
|
117 |
+
"base_uri": "https://localhost:8080/"
|
118 |
+
},
|
119 |
+
"id": "ypivRYgqy4kb",
|
120 |
+
"outputId": "360eef9e-9186-4ed6-ed50-fe8e6a3fabf0"
|
121 |
+
},
|
122 |
+
"execution_count": null,
|
123 |
+
"outputs": [
|
124 |
+
{
|
125 |
+
"output_type": "stream",
|
126 |
+
"name": "stdout",
|
127 |
+
"text": [
|
128 |
+
"Number of ratings: 100836\n",
|
129 |
+
"Number of unique movieId's: 9724\n",
|
130 |
+
"Number of unique users: 610\n",
|
131 |
+
"Average ratings per user: 165.3\n",
|
132 |
+
"Average ratings per movie: 10.37\n"
|
133 |
+
]
|
134 |
+
}
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"source": [
|
140 |
+
"user_freq = ratings[['userId', 'movieId']].groupby(\n",
|
141 |
+
" 'userId').count().reset_index()\n",
|
142 |
+
"user_freq.columns = ['userId', 'n_ratings']\n",
|
143 |
+
"print(user_freq.head())"
|
144 |
+
],
|
145 |
+
"metadata": {
|
146 |
+
"colab": {
|
147 |
+
"base_uri": "https://localhost:8080/"
|
148 |
+
},
|
149 |
+
"id": "PYZsye4-zAfi",
|
150 |
+
"outputId": "fb38061d-d9bc-4552-de9b-de418780ec32"
|
151 |
+
},
|
152 |
+
"execution_count": null,
|
153 |
+
"outputs": [
|
154 |
+
{
|
155 |
+
"output_type": "stream",
|
156 |
+
"name": "stdout",
|
157 |
+
"text": [
|
158 |
+
" userId n_ratings\n",
|
159 |
+
"0 1 232\n",
|
160 |
+
"1 2 29\n",
|
161 |
+
"2 3 39\n",
|
162 |
+
"3 4 216\n",
|
163 |
+
"4 5 44\n"
|
164 |
+
]
|
165 |
+
}
|
166 |
+
]
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"cell_type": "code",
|
170 |
+
"source": [
|
171 |
+
"# Find Lowest and Highest rated movies:\n",
|
172 |
+
"mean_rating = ratings.groupby('movieId')[['rating']].mean()\n",
|
173 |
+
"# Lowest rated movies\n",
|
174 |
+
"lowest_rated = mean_rating['rating'].idxmin()\n",
|
175 |
+
"movies.loc[movies['movieId'] == lowest_rated]\n",
|
176 |
+
"# Highest rated movies\n",
|
177 |
+
"highest_rated = mean_rating['rating'].idxmax()\n",
|
178 |
+
"movies.loc[movies['movieId'] == highest_rated]\n",
|
179 |
+
"# show number of people who rated movies rated movie highest\n",
|
180 |
+
"ratings[ratings['movieId']==highest_rated]\n",
|
181 |
+
"# show number of people who rated movies rated movie lowest\n",
|
182 |
+
"ratings[ratings['movieId']==lowest_rated]\n",
|
183 |
+
"\n",
|
184 |
+
"## the above movies has very low dataset. We will use bayesian average\n",
|
185 |
+
"movie_stats = ratings.groupby('movieId')[['rating']].agg(['count', 'mean'])\n",
|
186 |
+
"movie_stats.columns = movie_stats.columns.droplevel()"
|
187 |
+
],
|
188 |
+
"metadata": {
|
189 |
+
"id": "H1s9d6QIzBzv"
|
190 |
+
},
|
191 |
+
"execution_count": null,
|
192 |
+
"outputs": []
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"cell_type": "code",
|
196 |
+
"source": [
|
197 |
+
"# Now, we create user-item matrix using scipy csr matrix\n",
|
198 |
+
"from scipy.sparse import csr_matrix\n",
|
199 |
+
"\n",
|
200 |
+
"def create_matrix(df):\n",
|
201 |
+
"\n",
|
202 |
+
" N = len(df['userId'].unique())\n",
|
203 |
+
" M = len(df['movieId'].unique())\n",
|
204 |
+
"\n",
|
205 |
+
" # Map Ids to indices\n",
|
206 |
+
" user_mapper = dict(zip(np.unique(df[\"userId\"]), list(range(N))))\n",
|
207 |
+
" movie_mapper = dict(zip(np.unique(df[\"movieId\"]), list(range(M))))\n",
|
208 |
+
"\n",
|
209 |
+
" # Map indices to IDs\n",
|
210 |
+
" user_inv_mapper = dict(zip(list(range(N)), np.unique(df[\"userId\"])))\n",
|
211 |
+
" movie_inv_mapper = dict(zip(list(range(M)), np.unique(df[\"movieId\"])))\n",
|
212 |
+
"\n",
|
213 |
+
" user_index = [user_mapper[i] for i in df['userId']]\n",
|
214 |
+
" movie_index = [movie_mapper[i] for i in df['movieId']]\n",
|
215 |
+
"\n",
|
216 |
+
" X = csr_matrix((df[\"rating\"], (movie_index, user_index)), shape=(M, N))\n",
|
217 |
+
"\n",
|
218 |
+
" return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper\n",
|
219 |
+
"\n",
|
220 |
+
"X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)"
|
221 |
+
],
|
222 |
+
"metadata": {
|
223 |
+
"id": "2tG23gzjzDLg"
|
224 |
+
},
|
225 |
+
"execution_count": null,
|
226 |
+
"outputs": []
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"cell_type": "code",
|
230 |
+
"source": [
|
231 |
+
"\"\"\"\n",
|
232 |
+
"Find similar movies using KNN\n",
|
233 |
+
"\"\"\"\n",
|
234 |
+
"from sklearn.neighbors import NearestNeighbors\n",
|
235 |
+
"def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):\n",
|
236 |
+
"\n",
|
237 |
+
" neighbour_ids = []\n",
|
238 |
+
"\n",
|
239 |
+
" movie_ind = movie_mapper[movie_id]\n",
|
240 |
+
" movie_vec = X[movie_ind]\n",
|
241 |
+
" k+=1\n",
|
242 |
+
" kNN = NearestNeighbors(n_neighbors=k, algorithm=\"brute\", metric=metric)\n",
|
243 |
+
" kNN.fit(X)\n",
|
244 |
+
" movie_vec = movie_vec.reshape(1,-1)\n",
|
245 |
+
" neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)\n",
|
246 |
+
" for i in range(0,k):\n",
|
247 |
+
" n = neighbour.item(i)\n",
|
248 |
+
" neighbour_ids.append(movie_inv_mapper[n])\n",
|
249 |
+
" neighbour_ids.pop(0)\n",
|
250 |
+
" return neighbour_ids\n",
|
251 |
+
"\n",
|
252 |
+
"\n",
|
253 |
+
"movie_titles = dict(zip(movies['movieId'], movies['title']))\n",
|
254 |
+
"\n",
|
255 |
+
"movie_id = 3\n",
|
256 |
+
"\n",
|
257 |
+
"similar_ids = find_similar_movies(movie_id, X, k=10)\n",
|
258 |
+
"movie_title = movie_titles[movie_id]\n",
|
259 |
+
"\n",
|
260 |
+
"print(f\"Since you watched {movie_title}\")\n",
|
261 |
+
"for i in similar_ids:\n",
|
262 |
+
" print(movie_titles[i])"
|
263 |
+
],
|
264 |
+
"metadata": {
|
265 |
+
"colab": {
|
266 |
+
"base_uri": "https://localhost:8080/"
|
267 |
+
},
|
268 |
+
"id": "onBGmGk5zGAZ",
|
269 |
+
"outputId": "c5a409ff-c16d-413f-9339-a2ff977eef69"
|
270 |
+
},
|
271 |
+
"execution_count": null,
|
272 |
+
"outputs": [
|
273 |
+
{
|
274 |
+
"output_type": "stream",
|
275 |
+
"name": "stdout",
|
276 |
+
"text": [
|
277 |
+
"Since you watched Grumpier Old Men (1995)\n",
|
278 |
+
"Grumpy Old Men (1993)\n",
|
279 |
+
"Striptease (1996)\n",
|
280 |
+
"Nutty Professor, The (1996)\n",
|
281 |
+
"Twister (1996)\n",
|
282 |
+
"Father of the Bride Part II (1995)\n",
|
283 |
+
"Broken Arrow (1996)\n",
|
284 |
+
"Bio-Dome (1996)\n",
|
285 |
+
"Truth About Cats & Dogs, The (1996)\n",
|
286 |
+
"Sabrina (1995)\n",
|
287 |
+
"Birdcage, The (1996)\n"
|
288 |
+
]
|
289 |
+
}
|
290 |
+
]
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"cell_type": "code",
|
294 |
+
"source": [
|
295 |
+
"def recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10):\n",
|
296 |
+
" df1 = ratings[ratings['userId'] == user_id]\n",
|
297 |
+
"\n",
|
298 |
+
" if df1.empty:\n",
|
299 |
+
" print(f\"User with ID {user_id} does not exist.\")\n",
|
300 |
+
" return\n",
|
301 |
+
"\n",
|
302 |
+
" movie_id = df1[df1['rating'] == max(df1['rating'])]['movieId'].iloc[0]\n",
|
303 |
+
"\n",
|
304 |
+
" movie_titles = dict(zip(movies['movieId'], movies['title']))\n",
|
305 |
+
"\n",
|
306 |
+
" similar_ids = find_similar_movies(movie_id, X, k)\n",
|
307 |
+
" movie_title = movie_titles.get(movie_id, \"Movie not found\")\n",
|
308 |
+
"\n",
|
309 |
+
" if movie_title == \"Movie not found\":\n",
|
310 |
+
" print(f\"Movie with ID {movie_id} not found.\")\n",
|
311 |
+
" return\n",
|
312 |
+
"\n",
|
313 |
+
" print(f\"Since you watched {movie_title}, you might also like:\")\n",
|
314 |
+
" for i in similar_ids:\n",
|
315 |
+
" print(movie_titles.get(i, \"Movie not found\"))"
|
316 |
+
],
|
317 |
+
"metadata": {
|
318 |
+
"id": "PrN_SjhMzHxy"
|
319 |
+
},
|
320 |
+
"execution_count": null,
|
321 |
+
"outputs": []
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"cell_type": "code",
|
325 |
+
"source": [
|
326 |
+
"user_id = 150 # Replace with the desired user ID\n",
|
327 |
+
"recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10)"
|
328 |
+
],
|
329 |
+
"metadata": {
|
330 |
+
"colab": {
|
331 |
+
"base_uri": "https://localhost:8080/"
|
332 |
+
},
|
333 |
+
"id": "L13wNuTOzJry",
|
334 |
+
"outputId": "5316c18d-8323-4fc8-8ed0-a503edf93f29"
|
335 |
+
},
|
336 |
+
"execution_count": null,
|
337 |
+
"outputs": [
|
338 |
+
{
|
339 |
+
"output_type": "stream",
|
340 |
+
"name": "stdout",
|
341 |
+
"text": [
|
342 |
+
"Since you watched Twelve Monkeys (a.k.a. 12 Monkeys) (1995), you might also like:\n",
|
343 |
+
"Pulp Fiction (1994)\n",
|
344 |
+
"Terminator 2: Judgment Day (1991)\n",
|
345 |
+
"Independence Day (a.k.a. ID4) (1996)\n",
|
346 |
+
"Seven (a.k.a. Se7en) (1995)\n",
|
347 |
+
"Fargo (1996)\n",
|
348 |
+
"Fugitive, The (1993)\n",
|
349 |
+
"Usual Suspects, The (1995)\n",
|
350 |
+
"Jurassic Park (1993)\n",
|
351 |
+
"Star Wars: Episode IV - A New Hope (1977)\n",
|
352 |
+
"Heat (1995)\n"
|
353 |
+
]
|
354 |
+
}
|
355 |
+
]
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"cell_type": "code",
|
359 |
+
"source": [
|
360 |
+
"user_id = 415 # Replace with the desired user ID\n",
|
361 |
+
"recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10)"
|
362 |
+
],
|
363 |
+
"metadata": {
|
364 |
+
"colab": {
|
365 |
+
"base_uri": "https://localhost:8080/"
|
366 |
+
},
|
367 |
+
"id": "pEZ5ISP8zLB1",
|
368 |
+
"outputId": "e61b8c3f-db5d-4c49-b876-5bc19a490ce4"
|
369 |
+
},
|
370 |
+
"execution_count": null,
|
371 |
+
"outputs": [
|
372 |
+
{
|
373 |
+
"output_type": "stream",
|
374 |
+
"name": "stdout",
|
375 |
+
"text": [
|
376 |
+
"Since you watched Pulp Fiction (1994), you might also like:\n",
|
377 |
+
"Silence of the Lambs, The (1991)\n",
|
378 |
+
"Shawshank Redemption, The (1994)\n",
|
379 |
+
"Seven (a.k.a. Se7en) (1995)\n",
|
380 |
+
"Forrest Gump (1994)\n",
|
381 |
+
"Usual Suspects, The (1995)\n",
|
382 |
+
"Braveheart (1995)\n",
|
383 |
+
"Fight Club (1999)\n",
|
384 |
+
"Fargo (1996)\n",
|
385 |
+
"Terminator 2: Judgment Day (1991)\n",
|
386 |
+
"Reservoir Dogs (1992)\n"
|
387 |
+
]
|
388 |
+
}
|
389 |
+
]
|
390 |
+
}
|
391 |
+
]
|
392 |
+
}
|
app.py
CHANGED
@@ -1,8 +1,16 @@
|
|
1 |
#Fast APi Packages
|
2 |
-
from fastapi import FastAPI,File
|
3 |
from pydantic import BaseModel
|
4 |
import json
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
#SkillExtraction Packages
|
7 |
import psycopg2
|
8 |
import pandas as pd
|
@@ -39,6 +47,15 @@ nlp = spacy.load("en_core_web_lg")
|
|
39 |
skill_extractor = SkillExtractor(nlp, SKILL_DB, PhraseMatcher)
|
40 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
@app.get("/")
|
43 |
async def root():
|
44 |
return {"Recommendation":"Recommendation Version 1.00, https://vaibhav84-recommendation.hf.space/redoc , https://vaibhav84-recommendation.hf.space/docs"}
|
@@ -58,6 +75,31 @@ def UploadJobDescription(CustomerID : str, CustomerPwd: str):
|
|
58 |
except Exception as e:
|
59 |
return "An error occurred: {e}"
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
@app.post("/UploadJobDescription/")
|
62 |
async def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
|
63 |
try:
|
@@ -87,6 +129,51 @@ def UpdateSkills(Skills : ClassModals.Modals.UpdateSkillDetails):
|
|
87 |
def AllSkills():
|
88 |
return (AddSkill.GetSkillDetails())
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
#return JSONResponse(content={"message": "Here's your interdimensional portal." , "mes1":"data2"})
|
91 |
#https://vaibhav84-resumeapi.hf.space/docs
|
92 |
#https://vaibhav84-resumeapi.hf.space/redoc d
|
|
|
1 |
#Fast APi Packages
|
2 |
+
from fastapi import FastAPI,File, HTTPException
|
3 |
from pydantic import BaseModel
|
4 |
import json
|
5 |
|
6 |
+
|
7 |
+
from typing import List, Dict, Any
|
8 |
+
import pandas as pd
|
9 |
+
import numpy as np
|
10 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
11 |
+
from scipy import sparse
|
12 |
+
from datetime import datetime
|
13 |
+
|
14 |
#SkillExtraction Packages
|
15 |
import psycopg2
|
16 |
import pandas as pd
|
|
|
47 |
skill_extractor = SkillExtractor(nlp, SKILL_DB, PhraseMatcher)
|
48 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
49 |
|
50 |
+
purchase_history = pd.read_excel('datasetsample.xlsx', sheet_name='Transaction History',
|
51 |
+
parse_dates=['Purchase_Date'])
|
52 |
+
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
|
53 |
+
product_categories = purchase_history[['Product_Id', 'Category']].drop_duplicates().set_index('Product_Id')['Category'].to_dict()
|
54 |
+
purchase_counts = purchase_history.groupby(['Customer_Id', 'Product_Id']).size().unstack(fill_value=0)
|
55 |
+
sparse_purchase_counts = sparse.csr_matrix(purchase_counts)
|
56 |
+
cosine_similarities = cosine_similarity(sparse_purchase_counts.T)
|
57 |
+
|
58 |
+
|
59 |
@app.get("/")
|
60 |
async def root():
|
61 |
return {"Recommendation":"Recommendation Version 1.00, https://vaibhav84-recommendation.hf.space/redoc , https://vaibhav84-recommendation.hf.space/docs"}
|
|
|
75 |
except Exception as e:
|
76 |
return "An error occurred: {e}"
|
77 |
|
78 |
+
@app.get("/recommendations/{customer_id}")
|
79 |
+
async def get_recommendations(customer_id: str, n: int = 5):
|
80 |
+
"""
|
81 |
+
Get recommendations for a customer
|
82 |
+
|
83 |
+
Parameters:
|
84 |
+
- customer_id: The ID of the customer
|
85 |
+
- n: Number of recommendations to return (default: 5)
|
86 |
+
|
87 |
+
Returns:
|
88 |
+
- JSON object containing purchase history and recommendations
|
89 |
+
"""
|
90 |
+
try:
|
91 |
+
purchased_items, recommended_items = get_customer_items_and_recommendations(customer_id, n)
|
92 |
+
|
93 |
+
return {
|
94 |
+
"customer_id": customer_id,
|
95 |
+
"purchase_history": purchased_items,
|
96 |
+
"recommendations": recommended_items
|
97 |
+
}
|
98 |
+
except Exception as e:
|
99 |
+
raise HTTPException(status_code=404, detail=f"Error processing customer ID: {customer_id}. {str(e)}")
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
@app.post("/UploadJobDescription/")
|
104 |
async def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
|
105 |
try:
|
|
|
129 |
def AllSkills():
|
130 |
return (AddSkill.GetSkillDetails())
|
131 |
|
132 |
+
|
133 |
+
def get_customer_items_and_recommendations(user_id: str, n: int = 5) -> tuple[List[Dict], List[Dict]]:
|
134 |
+
"""
|
135 |
+
Get both purchased items and recommendations for a user
|
136 |
+
"""
|
137 |
+
user_id = str(user_id)
|
138 |
+
|
139 |
+
if user_id not in purchase_counts.index:
|
140 |
+
return [], []
|
141 |
+
|
142 |
+
purchased_items = list(purchase_counts.columns[purchase_counts.loc[user_id] > 0])
|
143 |
+
|
144 |
+
purchased_items_info = []
|
145 |
+
user_purchases = purchase_history[purchase_history['Customer_Id'] == user_id]
|
146 |
+
|
147 |
+
for item in purchased_items:
|
148 |
+
item_purchases = user_purchases[user_purchases['Product_Id'] == item]
|
149 |
+
total_amount = float(item_purchases['Amount (In Dollars)'].sum())
|
150 |
+
last_purchase = pd.to_datetime(item_purchases['Purchase_Date'].max())
|
151 |
+
category = product_categories.get(item, 'Unknown')
|
152 |
+
purchased_items_info.append({
|
153 |
+
'product_id': item,
|
154 |
+
'category': category,
|
155 |
+
'total_amount': total_amount,
|
156 |
+
'last_purchase': last_purchase.strftime('%Y-%m-%d')
|
157 |
+
})
|
158 |
+
|
159 |
+
user_idx = purchase_counts.index.get_loc(user_id)
|
160 |
+
user_history = sparse_purchase_counts[user_idx].toarray().flatten()
|
161 |
+
similarities = cosine_similarities.dot(user_history)
|
162 |
+
purchased_indices = np.where(user_history > 0)[0]
|
163 |
+
similarities[purchased_indices] = 0
|
164 |
+
recommended_indices = np.argsort(similarities)[::-1][:n]
|
165 |
+
recommended_items = list(purchase_counts.columns[recommended_indices])
|
166 |
+
recommended_items = [item for item in recommended_items if item not in purchased_items]
|
167 |
+
|
168 |
+
recommended_items_info = [
|
169 |
+
{
|
170 |
+
'product_id': item,
|
171 |
+
'category': product_categories.get(item, 'Unknown')
|
172 |
+
}
|
173 |
+
for item in recommended_items
|
174 |
+
]
|
175 |
+
|
176 |
+
return purchased_items_info, recommended_items_info
|
177 |
#return JSONResponse(content={"message": "Here's your interdimensional portal." , "mes1":"data2"})
|
178 |
#https://vaibhav84-resumeapi.hf.space/docs
|
179 |
#https://vaibhav84-resumeapi.hf.space/redoc d
|
movies.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
sample.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
from scipy import sparse
|
5 |
+
|
6 |
+
# Load the purchase history data from Excel file and convert Purchase_Date to datetime
|
7 |
+
purchase_history = pd.read_excel('datasetsample.xlsx', sheet_name='Transaction History',
|
8 |
+
parse_dates=['Purchase_Date']) # Add this parameter
|
9 |
+
|
10 |
+
# Ensure Customer_Id is treated as string
|
11 |
+
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
|
12 |
+
|
13 |
+
# Create a dictionary to store product categories
|
14 |
+
product_categories = purchase_history[['Product_Id', 'Category']].drop_duplicates().set_index('Product_Id')['Category'].to_dict()
|
15 |
+
|
16 |
+
# Count the number of purchases for each user and product combination
|
17 |
+
purchase_counts = purchase_history.groupby(['Customer_Id', 'Product_Id']).size().unstack(fill_value=0)
|
18 |
+
|
19 |
+
# Convert the purchase counts to a sparse matrix
|
20 |
+
sparse_purchase_counts = sparse.csr_matrix(purchase_counts)
|
21 |
+
|
22 |
+
# Compute the cosine similarity matrix between the products
|
23 |
+
cosine_similarities = cosine_similarity(sparse_purchase_counts.T)
|
24 |
+
|
25 |
+
def get_customer_items_and_recommendations(user_id, n=5):
|
26 |
+
"""
|
27 |
+
Get both purchased items and recommendations for a user
|
28 |
+
|
29 |
+
Parameters:
|
30 |
+
user_id (str): The customer ID as string
|
31 |
+
n (int): Number of recommendations to return
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
tuple: (purchased_items_info, recommended_items_info)
|
35 |
+
"""
|
36 |
+
# Convert user_id to string if it's not already
|
37 |
+
user_id = str(user_id)
|
38 |
+
|
39 |
+
# Check if user exists in the data
|
40 |
+
if user_id not in purchase_counts.index:
|
41 |
+
return [], []
|
42 |
+
|
43 |
+
# Get purchased items
|
44 |
+
purchased_items = list(purchase_counts.columns[purchase_counts.loc[user_id] > 0])
|
45 |
+
|
46 |
+
# Get purchased items details
|
47 |
+
purchased_items_info = []
|
48 |
+
user_purchases = purchase_history[purchase_history['Customer_Id'] == user_id]
|
49 |
+
for item in purchased_items:
|
50 |
+
item_purchases = user_purchases[user_purchases['Product_Id'] == item]
|
51 |
+
total_amount = item_purchases['Amount (In Dollars)'].sum()
|
52 |
+
last_purchase = pd.to_datetime(item_purchases['Purchase_Date'].max()) # Ensure datetime
|
53 |
+
category = product_categories.get(item, 'Unknown')
|
54 |
+
purchased_items_info.append({
|
55 |
+
'product_id': item,
|
56 |
+
'category': category,
|
57 |
+
'total_amount': total_amount,
|
58 |
+
'last_purchase': last_purchase
|
59 |
+
})
|
60 |
+
|
61 |
+
# Get the user's index in the matrix
|
62 |
+
user_idx = purchase_counts.index.get_loc(user_id)
|
63 |
+
|
64 |
+
# Get the user's purchase history
|
65 |
+
user_history = sparse_purchase_counts[user_idx].toarray().flatten()
|
66 |
+
|
67 |
+
# Compute similarities and get recommendations
|
68 |
+
similarities = cosine_similarities.dot(user_history)
|
69 |
+
purchased_indices = np.where(user_history > 0)[0]
|
70 |
+
similarities[purchased_indices] = 0
|
71 |
+
recommended_indices = np.argsort(similarities)[::-1][:n]
|
72 |
+
recommended_items = list(purchase_counts.columns[recommended_indices])
|
73 |
+
|
74 |
+
# Remove any purchased items from recommendations
|
75 |
+
recommended_items = [item for item in recommended_items if item not in purchased_items]
|
76 |
+
|
77 |
+
# Get recommended items details
|
78 |
+
recommended_items_info = [
|
79 |
+
{
|
80 |
+
'product_id': item,
|
81 |
+
'category': product_categories.get(item, 'Unknown')
|
82 |
+
}
|
83 |
+
for item in recommended_items
|
84 |
+
]
|
85 |
+
|
86 |
+
return purchased_items_info, recommended_items_info
|
87 |
+
|
88 |
+
def display_customer_profile(user_id, n=5):
|
89 |
+
"""
|
90 |
+
Display customer's purchase history and recommendations with categories
|
91 |
+
|
92 |
+
Parameters:
|
93 |
+
user_id (str): The customer ID as string
|
94 |
+
n (int): Number of recommendations to return
|
95 |
+
"""
|
96 |
+
purchased_items_info, recommended_items_info = get_customer_items_and_recommendations(user_id, n)
|
97 |
+
|
98 |
+
print(f"\nCustomer Profile for {user_id}")
|
99 |
+
print("-" * 70)
|
100 |
+
|
101 |
+
print("\nPurchase History:")
|
102 |
+
if purchased_items_info:
|
103 |
+
print(f"{'Product ID':<15} {'Category':<20} {'Total Amount':>12} {'Last Purchase'}")
|
104 |
+
print("-" * 70)
|
105 |
+
for item in purchased_items_info:
|
106 |
+
print(f"{item['product_id']:<15} {item['category']:<20} ${item['total_amount']:>11.2f} {item['last_purchase'].strftime('%Y-%m-%d')}")
|
107 |
+
else:
|
108 |
+
print("No purchase history found")
|
109 |
+
|
110 |
+
print("\nRecommended Items:")
|
111 |
+
if recommended_items_info:
|
112 |
+
print(f"{'Product ID':<15} {'Category':<20}")
|
113 |
+
print("-" * 35)
|
114 |
+
for item in recommended_items_info:
|
115 |
+
print(f"{item['product_id']:<15} {item['category']:<20}")
|
116 |
+
else:
|
117 |
+
print("No recommendations available")
|
118 |
+
|
119 |
+
print("-" * 70)
|
120 |
+
|
121 |
+
# Example usage:
|
122 |
+
customer_id = 'CUST2025A' # Replace with your actual customer ID
|
123 |
+
display_customer_profile(customer_id)
|