Spaces:
Sleeping
Sleeping
#Fast APi Packages | |
from fastapi import FastAPI, File, HTTPException | |
from pydantic import BaseModel | |
import json | |
from typing import List, Dict, Any | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
from scipy import sparse | |
from datetime import datetime | |
import warnings | |
import os | |
import logging | |
import requests | |
import io | |
warnings.filterwarnings('ignore') | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
app = FastAPI() | |
# URL of the Excel file | |
EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx" | |
try: | |
# Download the file from URL | |
logger.info(f"Attempting to download Excel file from: {EXCEL_URL}") | |
response = requests.get(EXCEL_URL) | |
response.raise_for_status() # Raises an HTTPError if the status is 4xx, 5xx | |
# Read the Excel file from the downloaded content | |
excel_content = io.BytesIO(response.content) | |
purchase_history = pd.read_excel(excel_content, sheet_name='Transaction History', | |
parse_dates=['Purchase_Date']) | |
logger.info("Successfully downloaded and loaded Excel file") | |
# Process the data | |
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str) | |
product_categories = purchase_history[['Product_Id', 'Category']].drop_duplicates().set_index('Product_Id')['Category'].to_dict() | |
purchase_counts = purchase_history.groupby(['Customer_Id', 'Product_Id']).size().unstack(fill_value=0) | |
sparse_purchase_counts = sparse.csr_matrix(purchase_counts) | |
cosine_similarities = cosine_similarity(sparse_purchase_counts.T) | |
logger.info("Data processing completed successfully") | |
except Exception as e: | |
logger.error(f"Error downloading or processing data: {str(e)}") | |
raise | |
def get_customer_items_and_recommendations(user_id: str, n: int = 5) -> tuple[List[Dict], List[Dict]]: | |
""" | |
Get both purchased items and recommendations for a user | |
""" | |
user_id = str(user_id) | |
if user_id not in purchase_counts.index: | |
return [], [] | |
purchased_items = list(purchase_counts.columns[purchase_counts.loc[user_id] > 0]) | |
purchased_items_info = [] | |
user_purchases = purchase_history[purchase_history['Customer_Id'] == user_id] | |
for item in purchased_items: | |
item_purchases = user_purchases[user_purchases['Product_Id'] == item] | |
total_amount = float(item_purchases['Amount (In Dollars)'].sum()) | |
last_purchase = pd.to_datetime(item_purchases['Purchase_Date'].max()) | |
category = product_categories.get(item, 'Unknown') | |
purchased_items_info.append({ | |
'product_id': item, | |
'category': category, | |
'total_amount': total_amount, | |
'last_purchase': last_purchase.strftime('%Y-%m-%d') | |
}) | |
user_idx = purchase_counts.index.get_loc(user_id) | |
user_history = sparse_purchase_counts[user_idx].toarray().flatten() | |
similarities = cosine_similarities.dot(user_history) | |
purchased_indices = np.where(user_history > 0)[0] | |
similarities[purchased_indices] = 0 | |
recommended_indices = np.argsort(similarities)[::-1][:n] | |
recommended_items = list(purchase_counts.columns[recommended_indices]) | |
recommended_items = [item for item in recommended_items if item not in purchased_items] | |
recommended_items_info = [ | |
{ | |
'product_id': item, | |
'category': product_categories.get(item, 'Unknown') | |
} | |
for item in recommended_items | |
] | |
return purchased_items_info, recommended_items_info | |
async def root(): | |
return { | |
"message": "Welcome to the Recommendation API", | |
"status": "running", | |
"data_loaded": purchase_history is not None | |
} | |
async def get_recommendations(customer_id: str, n: int = 5): | |
""" | |
Get recommendations for a customer | |
Parameters: | |
- customer_id: The ID of the customer | |
- n: Number of recommendations to return (default: 5) | |
Returns: | |
- JSON object containing purchase history and recommendations | |
""" | |
try: | |
purchased_items, recommended_items = get_customer_items_and_recommendations(customer_id, n) | |
return { | |
"customer_id": customer_id, | |
"purchase_history": purchased_items, | |
"recommendations": recommended_items | |
} | |
except Exception as e: | |
logger.error(f"Error processing request for customer {customer_id}: {str(e)}") | |
raise HTTPException(status_code=404, detail=f"Error processing customer ID: {customer_id}. {str(e)}") | |
async def health_check(): | |
""" | |
Health check endpoint that returns system information | |
""" | |
return { | |
"status": "healthy", | |
"data_loaded": purchase_history is not None, | |
"number_of_customers": len(purchase_counts.index) if purchase_history is not None else 0, | |
"number_of_products": len(purchase_counts.columns) if purchase_history is not None else 0 | |
} | |