Vaibhav84 commited on
Commit
d034937
·
1 Parent(s): 8f0089b
Files changed (5) hide show
  1. DataSetSample.xlsx +0 -0
  2. Recommendation_System_in_Python.ipynb +392 -0
  3. app.py +88 -1
  4. movies.csv +0 -0
  5. sample.py +123 -0
DataSetSample.xlsx ADDED
Binary file (24.4 kB). View file
 
Recommendation_System_in_Python.ipynb ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "metadata": {
21
+ "id": "SKRYfHwWyVaG"
22
+ },
23
+ "outputs": [],
24
+ "source": [
25
+ "# Importing Libraries\n",
26
+ "import numpy as np\n",
27
+ "import pandas as pd\n",
28
+ "import sklearn\n",
29
+ "import matplotlib.pyplot as plt\n",
30
+ "import seaborn as sns\n",
31
+ "import warnings\n",
32
+ "warnings.simplefilter(action='ignore', category=FutureWarning)"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "source": [
38
+ "#loading rating dataset\n",
39
+ "ratings = pd.read_csv(\"https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv\")\n",
40
+ "print(ratings.head())"
41
+ ],
42
+ "metadata": {
43
+ "colab": {
44
+ "base_uri": "https://localhost:8080/"
45
+ },
46
+ "id": "v_ZFn93Wy1ho",
47
+ "outputId": "97f98476-d909-4050-bc37-68369391d756"
48
+ },
49
+ "execution_count": null,
50
+ "outputs": [
51
+ {
52
+ "output_type": "stream",
53
+ "name": "stdout",
54
+ "text": [
55
+ " userId movieId rating timestamp\n",
56
+ "0 1 1 4.0 964982703\n",
57
+ "1 1 3 4.0 964981247\n",
58
+ "2 1 6 4.0 964982224\n",
59
+ "3 1 47 5.0 964983815\n",
60
+ "4 1 50 5.0 964982931\n"
61
+ ]
62
+ }
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "source": [
68
+ "# loading movie dataset\n",
69
+ "movies = pd.read_csv(\"https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv\")\n",
70
+ "print(movies.head())"
71
+ ],
72
+ "metadata": {
73
+ "colab": {
74
+ "base_uri": "https://localhost:8080/"
75
+ },
76
+ "id": "_RCPOQWfy269",
77
+ "outputId": "4c3c68a1-dbbb-4795-d96a-4f9c11d3731b"
78
+ },
79
+ "execution_count": null,
80
+ "outputs": [
81
+ {
82
+ "output_type": "stream",
83
+ "name": "stdout",
84
+ "text": [
85
+ " movieId title \\\n",
86
+ "0 1 Toy Story (1995) \n",
87
+ "1 2 Jumanji (1995) \n",
88
+ "2 3 Grumpier Old Men (1995) \n",
89
+ "3 4 Waiting to Exhale (1995) \n",
90
+ "4 5 Father of the Bride Part II (1995) \n",
91
+ "\n",
92
+ " genres \n",
93
+ "0 Adventure|Animation|Children|Comedy|Fantasy \n",
94
+ "1 Adventure|Children|Fantasy \n",
95
+ "2 Comedy|Romance \n",
96
+ "3 Comedy|Drama|Romance \n",
97
+ "4 Comedy \n"
98
+ ]
99
+ }
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "source": [
105
+ "n_ratings = len(ratings)\n",
106
+ "n_movies = len(ratings['movieId'].unique())\n",
107
+ "n_users = len(ratings['userId'].unique())\n",
108
+ "\n",
109
+ "print(f\"Number of ratings: {n_ratings}\")\n",
110
+ "print(f\"Number of unique movieId's: {n_movies}\")\n",
111
+ "print(f\"Number of unique users: {n_users}\")\n",
112
+ "print(f\"Average ratings per user: {round(n_ratings/n_users, 2)}\")\n",
113
+ "print(f\"Average ratings per movie: {round(n_ratings/n_movies, 2)}\")"
114
+ ],
115
+ "metadata": {
116
+ "colab": {
117
+ "base_uri": "https://localhost:8080/"
118
+ },
119
+ "id": "ypivRYgqy4kb",
120
+ "outputId": "360eef9e-9186-4ed6-ed50-fe8e6a3fabf0"
121
+ },
122
+ "execution_count": null,
123
+ "outputs": [
124
+ {
125
+ "output_type": "stream",
126
+ "name": "stdout",
127
+ "text": [
128
+ "Number of ratings: 100836\n",
129
+ "Number of unique movieId's: 9724\n",
130
+ "Number of unique users: 610\n",
131
+ "Average ratings per user: 165.3\n",
132
+ "Average ratings per movie: 10.37\n"
133
+ ]
134
+ }
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "source": [
140
+ "user_freq = ratings[['userId', 'movieId']].groupby(\n",
141
+ " 'userId').count().reset_index()\n",
142
+ "user_freq.columns = ['userId', 'n_ratings']\n",
143
+ "print(user_freq.head())"
144
+ ],
145
+ "metadata": {
146
+ "colab": {
147
+ "base_uri": "https://localhost:8080/"
148
+ },
149
+ "id": "PYZsye4-zAfi",
150
+ "outputId": "fb38061d-d9bc-4552-de9b-de418780ec32"
151
+ },
152
+ "execution_count": null,
153
+ "outputs": [
154
+ {
155
+ "output_type": "stream",
156
+ "name": "stdout",
157
+ "text": [
158
+ " userId n_ratings\n",
159
+ "0 1 232\n",
160
+ "1 2 29\n",
161
+ "2 3 39\n",
162
+ "3 4 216\n",
163
+ "4 5 44\n"
164
+ ]
165
+ }
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "source": [
171
+ "# Find Lowest and Highest rated movies:\n",
172
+ "mean_rating = ratings.groupby('movieId')[['rating']].mean()\n",
173
+ "# Lowest rated movies\n",
174
+ "lowest_rated = mean_rating['rating'].idxmin()\n",
175
+ "movies.loc[movies['movieId'] == lowest_rated]\n",
176
+ "# Highest rated movies\n",
177
+ "highest_rated = mean_rating['rating'].idxmax()\n",
178
+ "movies.loc[movies['movieId'] == highest_rated]\n",
179
+ "# show number of people who rated movies rated movie highest\n",
180
+ "ratings[ratings['movieId']==highest_rated]\n",
181
+ "# show number of people who rated movies rated movie lowest\n",
182
+ "ratings[ratings['movieId']==lowest_rated]\n",
183
+ "\n",
184
+ "## the above movies has very low dataset. We will use bayesian average\n",
185
+ "movie_stats = ratings.groupby('movieId')[['rating']].agg(['count', 'mean'])\n",
186
+ "movie_stats.columns = movie_stats.columns.droplevel()"
187
+ ],
188
+ "metadata": {
189
+ "id": "H1s9d6QIzBzv"
190
+ },
191
+ "execution_count": null,
192
+ "outputs": []
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "source": [
197
+ "# Now, we create user-item matrix using scipy csr matrix\n",
198
+ "from scipy.sparse import csr_matrix\n",
199
+ "\n",
200
+ "def create_matrix(df):\n",
201
+ "\n",
202
+ " N = len(df['userId'].unique())\n",
203
+ " M = len(df['movieId'].unique())\n",
204
+ "\n",
205
+ " # Map Ids to indices\n",
206
+ " user_mapper = dict(zip(np.unique(df[\"userId\"]), list(range(N))))\n",
207
+ " movie_mapper = dict(zip(np.unique(df[\"movieId\"]), list(range(M))))\n",
208
+ "\n",
209
+ " # Map indices to IDs\n",
210
+ " user_inv_mapper = dict(zip(list(range(N)), np.unique(df[\"userId\"])))\n",
211
+ " movie_inv_mapper = dict(zip(list(range(M)), np.unique(df[\"movieId\"])))\n",
212
+ "\n",
213
+ " user_index = [user_mapper[i] for i in df['userId']]\n",
214
+ " movie_index = [movie_mapper[i] for i in df['movieId']]\n",
215
+ "\n",
216
+ " X = csr_matrix((df[\"rating\"], (movie_index, user_index)), shape=(M, N))\n",
217
+ "\n",
218
+ " return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper\n",
219
+ "\n",
220
+ "X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)"
221
+ ],
222
+ "metadata": {
223
+ "id": "2tG23gzjzDLg"
224
+ },
225
+ "execution_count": null,
226
+ "outputs": []
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "source": [
231
+ "\"\"\"\n",
232
+ "Find similar movies using KNN\n",
233
+ "\"\"\"\n",
234
+ "from sklearn.neighbors import NearestNeighbors\n",
235
+ "def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):\n",
236
+ "\n",
237
+ " neighbour_ids = []\n",
238
+ "\n",
239
+ " movie_ind = movie_mapper[movie_id]\n",
240
+ " movie_vec = X[movie_ind]\n",
241
+ " k+=1\n",
242
+ " kNN = NearestNeighbors(n_neighbors=k, algorithm=\"brute\", metric=metric)\n",
243
+ " kNN.fit(X)\n",
244
+ " movie_vec = movie_vec.reshape(1,-1)\n",
245
+ " neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)\n",
246
+ " for i in range(0,k):\n",
247
+ " n = neighbour.item(i)\n",
248
+ " neighbour_ids.append(movie_inv_mapper[n])\n",
249
+ " neighbour_ids.pop(0)\n",
250
+ " return neighbour_ids\n",
251
+ "\n",
252
+ "\n",
253
+ "movie_titles = dict(zip(movies['movieId'], movies['title']))\n",
254
+ "\n",
255
+ "movie_id = 3\n",
256
+ "\n",
257
+ "similar_ids = find_similar_movies(movie_id, X, k=10)\n",
258
+ "movie_title = movie_titles[movie_id]\n",
259
+ "\n",
260
+ "print(f\"Since you watched {movie_title}\")\n",
261
+ "for i in similar_ids:\n",
262
+ " print(movie_titles[i])"
263
+ ],
264
+ "metadata": {
265
+ "colab": {
266
+ "base_uri": "https://localhost:8080/"
267
+ },
268
+ "id": "onBGmGk5zGAZ",
269
+ "outputId": "c5a409ff-c16d-413f-9339-a2ff977eef69"
270
+ },
271
+ "execution_count": null,
272
+ "outputs": [
273
+ {
274
+ "output_type": "stream",
275
+ "name": "stdout",
276
+ "text": [
277
+ "Since you watched Grumpier Old Men (1995)\n",
278
+ "Grumpy Old Men (1993)\n",
279
+ "Striptease (1996)\n",
280
+ "Nutty Professor, The (1996)\n",
281
+ "Twister (1996)\n",
282
+ "Father of the Bride Part II (1995)\n",
283
+ "Broken Arrow (1996)\n",
284
+ "Bio-Dome (1996)\n",
285
+ "Truth About Cats & Dogs, The (1996)\n",
286
+ "Sabrina (1995)\n",
287
+ "Birdcage, The (1996)\n"
288
+ ]
289
+ }
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "source": [
295
+ "def recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10):\n",
296
+ " df1 = ratings[ratings['userId'] == user_id]\n",
297
+ "\n",
298
+ " if df1.empty:\n",
299
+ " print(f\"User with ID {user_id} does not exist.\")\n",
300
+ " return\n",
301
+ "\n",
302
+ " movie_id = df1[df1['rating'] == max(df1['rating'])]['movieId'].iloc[0]\n",
303
+ "\n",
304
+ " movie_titles = dict(zip(movies['movieId'], movies['title']))\n",
305
+ "\n",
306
+ " similar_ids = find_similar_movies(movie_id, X, k)\n",
307
+ " movie_title = movie_titles.get(movie_id, \"Movie not found\")\n",
308
+ "\n",
309
+ " if movie_title == \"Movie not found\":\n",
310
+ " print(f\"Movie with ID {movie_id} not found.\")\n",
311
+ " return\n",
312
+ "\n",
313
+ " print(f\"Since you watched {movie_title}, you might also like:\")\n",
314
+ " for i in similar_ids:\n",
315
+ " print(movie_titles.get(i, \"Movie not found\"))"
316
+ ],
317
+ "metadata": {
318
+ "id": "PrN_SjhMzHxy"
319
+ },
320
+ "execution_count": null,
321
+ "outputs": []
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "source": [
326
+ "user_id = 150 # Replace with the desired user ID\n",
327
+ "recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10)"
328
+ ],
329
+ "metadata": {
330
+ "colab": {
331
+ "base_uri": "https://localhost:8080/"
332
+ },
333
+ "id": "L13wNuTOzJry",
334
+ "outputId": "5316c18d-8323-4fc8-8ed0-a503edf93f29"
335
+ },
336
+ "execution_count": null,
337
+ "outputs": [
338
+ {
339
+ "output_type": "stream",
340
+ "name": "stdout",
341
+ "text": [
342
+ "Since you watched Twelve Monkeys (a.k.a. 12 Monkeys) (1995), you might also like:\n",
343
+ "Pulp Fiction (1994)\n",
344
+ "Terminator 2: Judgment Day (1991)\n",
345
+ "Independence Day (a.k.a. ID4) (1996)\n",
346
+ "Seven (a.k.a. Se7en) (1995)\n",
347
+ "Fargo (1996)\n",
348
+ "Fugitive, The (1993)\n",
349
+ "Usual Suspects, The (1995)\n",
350
+ "Jurassic Park (1993)\n",
351
+ "Star Wars: Episode IV - A New Hope (1977)\n",
352
+ "Heat (1995)\n"
353
+ ]
354
+ }
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "source": [
360
+ "user_id = 415 # Replace with the desired user ID\n",
361
+ "recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10)"
362
+ ],
363
+ "metadata": {
364
+ "colab": {
365
+ "base_uri": "https://localhost:8080/"
366
+ },
367
+ "id": "pEZ5ISP8zLB1",
368
+ "outputId": "e61b8c3f-db5d-4c49-b876-5bc19a490ce4"
369
+ },
370
+ "execution_count": null,
371
+ "outputs": [
372
+ {
373
+ "output_type": "stream",
374
+ "name": "stdout",
375
+ "text": [
376
+ "Since you watched Pulp Fiction (1994), you might also like:\n",
377
+ "Silence of the Lambs, The (1991)\n",
378
+ "Shawshank Redemption, The (1994)\n",
379
+ "Seven (a.k.a. Se7en) (1995)\n",
380
+ "Forrest Gump (1994)\n",
381
+ "Usual Suspects, The (1995)\n",
382
+ "Braveheart (1995)\n",
383
+ "Fight Club (1999)\n",
384
+ "Fargo (1996)\n",
385
+ "Terminator 2: Judgment Day (1991)\n",
386
+ "Reservoir Dogs (1992)\n"
387
+ ]
388
+ }
389
+ ]
390
+ }
391
+ ]
392
+ }
app.py CHANGED
@@ -1,8 +1,16 @@
1
  #Fast APi Packages
2
- from fastapi import FastAPI,File
3
  from pydantic import BaseModel
4
  import json
5
 
 
 
 
 
 
 
 
 
6
  #SkillExtraction Packages
7
  import psycopg2
8
  import pandas as pd
@@ -39,6 +47,15 @@ nlp = spacy.load("en_core_web_lg")
39
  skill_extractor = SkillExtractor(nlp, SKILL_DB, PhraseMatcher)
40
  model = SentenceTransformer('all-MiniLM-L6-v2')
41
 
 
 
 
 
 
 
 
 
 
42
  @app.get("/")
43
  async def root():
44
  return {"Recommendation":"Recommendation Version 1.00, https://vaibhav84-recommendation.hf.space/redoc , https://vaibhav84-recommendation.hf.space/docs"}
@@ -58,6 +75,31 @@ def UploadJobDescription(CustomerID : str, CustomerPwd: str):
58
  except Exception as e:
59
  return "An error occurred: {e}"
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  @app.post("/UploadJobDescription/")
62
  async def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
63
  try:
@@ -87,6 +129,51 @@ def UpdateSkills(Skills : ClassModals.Modals.UpdateSkillDetails):
87
  def AllSkills():
88
  return (AddSkill.GetSkillDetails())
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  #return JSONResponse(content={"message": "Here's your interdimensional portal." , "mes1":"data2"})
91
  #https://vaibhav84-resumeapi.hf.space/docs
92
  #https://vaibhav84-resumeapi.hf.space/redoc d
 
1
  #Fast APi Packages
2
+ from fastapi import FastAPI,File, HTTPException
3
  from pydantic import BaseModel
4
  import json
5
 
6
+
7
+ from typing import List, Dict, Any
8
+ import pandas as pd
9
+ import numpy as np
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ from scipy import sparse
12
+ from datetime import datetime
13
+
14
  #SkillExtraction Packages
15
  import psycopg2
16
  import pandas as pd
 
47
  skill_extractor = SkillExtractor(nlp, SKILL_DB, PhraseMatcher)
48
  model = SentenceTransformer('all-MiniLM-L6-v2')
49
 
50
+ purchase_history = pd.read_excel('datasetsample.xlsx', sheet_name='Transaction History',
51
+ parse_dates=['Purchase_Date'])
52
+ purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
53
+ product_categories = purchase_history[['Product_Id', 'Category']].drop_duplicates().set_index('Product_Id')['Category'].to_dict()
54
+ purchase_counts = purchase_history.groupby(['Customer_Id', 'Product_Id']).size().unstack(fill_value=0)
55
+ sparse_purchase_counts = sparse.csr_matrix(purchase_counts)
56
+ cosine_similarities = cosine_similarity(sparse_purchase_counts.T)
57
+
58
+
59
  @app.get("/")
60
  async def root():
61
  return {"Recommendation":"Recommendation Version 1.00, https://vaibhav84-recommendation.hf.space/redoc , https://vaibhav84-recommendation.hf.space/docs"}
 
75
  except Exception as e:
76
  return "An error occurred: {e}"
77
 
78
+ @app.get("/recommendations/{customer_id}")
79
+ async def get_recommendations(customer_id: str, n: int = 5):
80
+ """
81
+ Get recommendations for a customer
82
+
83
+ Parameters:
84
+ - customer_id: The ID of the customer
85
+ - n: Number of recommendations to return (default: 5)
86
+
87
+ Returns:
88
+ - JSON object containing purchase history and recommendations
89
+ """
90
+ try:
91
+ purchased_items, recommended_items = get_customer_items_and_recommendations(customer_id, n)
92
+
93
+ return {
94
+ "customer_id": customer_id,
95
+ "purchase_history": purchased_items,
96
+ "recommendations": recommended_items
97
+ }
98
+ except Exception as e:
99
+ raise HTTPException(status_code=404, detail=f"Error processing customer ID: {customer_id}. {str(e)}")
100
+
101
+
102
+
103
  @app.post("/UploadJobDescription/")
104
  async def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
105
  try:
 
129
  def AllSkills():
130
  return (AddSkill.GetSkillDetails())
131
 
132
+
133
+ def get_customer_items_and_recommendations(user_id: str, n: int = 5) -> tuple[List[Dict], List[Dict]]:
134
+ """
135
+ Get both purchased items and recommendations for a user
136
+ """
137
+ user_id = str(user_id)
138
+
139
+ if user_id not in purchase_counts.index:
140
+ return [], []
141
+
142
+ purchased_items = list(purchase_counts.columns[purchase_counts.loc[user_id] > 0])
143
+
144
+ purchased_items_info = []
145
+ user_purchases = purchase_history[purchase_history['Customer_Id'] == user_id]
146
+
147
+ for item in purchased_items:
148
+ item_purchases = user_purchases[user_purchases['Product_Id'] == item]
149
+ total_amount = float(item_purchases['Amount (In Dollars)'].sum())
150
+ last_purchase = pd.to_datetime(item_purchases['Purchase_Date'].max())
151
+ category = product_categories.get(item, 'Unknown')
152
+ purchased_items_info.append({
153
+ 'product_id': item,
154
+ 'category': category,
155
+ 'total_amount': total_amount,
156
+ 'last_purchase': last_purchase.strftime('%Y-%m-%d')
157
+ })
158
+
159
+ user_idx = purchase_counts.index.get_loc(user_id)
160
+ user_history = sparse_purchase_counts[user_idx].toarray().flatten()
161
+ similarities = cosine_similarities.dot(user_history)
162
+ purchased_indices = np.where(user_history > 0)[0]
163
+ similarities[purchased_indices] = 0
164
+ recommended_indices = np.argsort(similarities)[::-1][:n]
165
+ recommended_items = list(purchase_counts.columns[recommended_indices])
166
+ recommended_items = [item for item in recommended_items if item not in purchased_items]
167
+
168
+ recommended_items_info = [
169
+ {
170
+ 'product_id': item,
171
+ 'category': product_categories.get(item, 'Unknown')
172
+ }
173
+ for item in recommended_items
174
+ ]
175
+
176
+ return purchased_items_info, recommended_items_info
177
  #return JSONResponse(content={"message": "Here's your interdimensional portal." , "mes1":"data2"})
178
  #https://vaibhav84-resumeapi.hf.space/docs
179
  #https://vaibhav84-resumeapi.hf.space/redoc d
movies.csv ADDED
The diff for this file is too large to render. See raw diff
 
sample.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ from scipy import sparse
5
+
6
+ # Load the purchase history data from Excel file and convert Purchase_Date to datetime
7
+ purchase_history = pd.read_excel('datasetsample.xlsx', sheet_name='Transaction History',
8
+ parse_dates=['Purchase_Date']) # Add this parameter
9
+
10
+ # Ensure Customer_Id is treated as string
11
+ purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
12
+
13
+ # Create a dictionary to store product categories
14
+ product_categories = purchase_history[['Product_Id', 'Category']].drop_duplicates().set_index('Product_Id')['Category'].to_dict()
15
+
16
+ # Count the number of purchases for each user and product combination
17
+ purchase_counts = purchase_history.groupby(['Customer_Id', 'Product_Id']).size().unstack(fill_value=0)
18
+
19
+ # Convert the purchase counts to a sparse matrix
20
+ sparse_purchase_counts = sparse.csr_matrix(purchase_counts)
21
+
22
+ # Compute the cosine similarity matrix between the products
23
+ cosine_similarities = cosine_similarity(sparse_purchase_counts.T)
24
+
25
+ def get_customer_items_and_recommendations(user_id, n=5):
26
+ """
27
+ Get both purchased items and recommendations for a user
28
+
29
+ Parameters:
30
+ user_id (str): The customer ID as string
31
+ n (int): Number of recommendations to return
32
+
33
+ Returns:
34
+ tuple: (purchased_items_info, recommended_items_info)
35
+ """
36
+ # Convert user_id to string if it's not already
37
+ user_id = str(user_id)
38
+
39
+ # Check if user exists in the data
40
+ if user_id not in purchase_counts.index:
41
+ return [], []
42
+
43
+ # Get purchased items
44
+ purchased_items = list(purchase_counts.columns[purchase_counts.loc[user_id] > 0])
45
+
46
+ # Get purchased items details
47
+ purchased_items_info = []
48
+ user_purchases = purchase_history[purchase_history['Customer_Id'] == user_id]
49
+ for item in purchased_items:
50
+ item_purchases = user_purchases[user_purchases['Product_Id'] == item]
51
+ total_amount = item_purchases['Amount (In Dollars)'].sum()
52
+ last_purchase = pd.to_datetime(item_purchases['Purchase_Date'].max()) # Ensure datetime
53
+ category = product_categories.get(item, 'Unknown')
54
+ purchased_items_info.append({
55
+ 'product_id': item,
56
+ 'category': category,
57
+ 'total_amount': total_amount,
58
+ 'last_purchase': last_purchase
59
+ })
60
+
61
+ # Get the user's index in the matrix
62
+ user_idx = purchase_counts.index.get_loc(user_id)
63
+
64
+ # Get the user's purchase history
65
+ user_history = sparse_purchase_counts[user_idx].toarray().flatten()
66
+
67
+ # Compute similarities and get recommendations
68
+ similarities = cosine_similarities.dot(user_history)
69
+ purchased_indices = np.where(user_history > 0)[0]
70
+ similarities[purchased_indices] = 0
71
+ recommended_indices = np.argsort(similarities)[::-1][:n]
72
+ recommended_items = list(purchase_counts.columns[recommended_indices])
73
+
74
+ # Remove any purchased items from recommendations
75
+ recommended_items = [item for item in recommended_items if item not in purchased_items]
76
+
77
+ # Get recommended items details
78
+ recommended_items_info = [
79
+ {
80
+ 'product_id': item,
81
+ 'category': product_categories.get(item, 'Unknown')
82
+ }
83
+ for item in recommended_items
84
+ ]
85
+
86
+ return purchased_items_info, recommended_items_info
87
+
88
+ def display_customer_profile(user_id, n=5):
89
+ """
90
+ Display customer's purchase history and recommendations with categories
91
+
92
+ Parameters:
93
+ user_id (str): The customer ID as string
94
+ n (int): Number of recommendations to return
95
+ """
96
+ purchased_items_info, recommended_items_info = get_customer_items_and_recommendations(user_id, n)
97
+
98
+ print(f"\nCustomer Profile for {user_id}")
99
+ print("-" * 70)
100
+
101
+ print("\nPurchase History:")
102
+ if purchased_items_info:
103
+ print(f"{'Product ID':<15} {'Category':<20} {'Total Amount':>12} {'Last Purchase'}")
104
+ print("-" * 70)
105
+ for item in purchased_items_info:
106
+ print(f"{item['product_id']:<15} {item['category']:<20} ${item['total_amount']:>11.2f} {item['last_purchase'].strftime('%Y-%m-%d')}")
107
+ else:
108
+ print("No purchase history found")
109
+
110
+ print("\nRecommended Items:")
111
+ if recommended_items_info:
112
+ print(f"{'Product ID':<15} {'Category':<20}")
113
+ print("-" * 35)
114
+ for item in recommended_items_info:
115
+ print(f"{item['product_id']:<15} {item['category']:<20}")
116
+ else:
117
+ print("No recommendations available")
118
+
119
+ print("-" * 70)
120
+
121
+ # Example usage:
122
+ customer_id = 'CUST2025A' # Replace with your actual customer ID
123
+ display_customer_profile(customer_id)