Spaces:
Sleeping
Sleeping
api
Browse files
app.py
CHANGED
@@ -28,6 +28,7 @@ app = FastAPI()
|
|
28 |
|
29 |
# URL of the Excel file
|
30 |
EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
|
|
|
31 |
try:
|
32 |
# Download the file from URL
|
33 |
logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
|
@@ -36,54 +37,8 @@ try:
|
|
36 |
|
37 |
# Read the Excel file from the downloaded content
|
38 |
excel_content = io.BytesIO(response.content)
|
39 |
-
|
40 |
-
|
41 |
-
try:
|
42 |
-
# First try to parse as MM/DD/YYYY
|
43 |
-
dt = datetime.strptime(str(date_str), '%m/%d/%Y')
|
44 |
-
return dt
|
45 |
-
except ValueError:
|
46 |
-
try:
|
47 |
-
# Then try DD/MM/YYYY
|
48 |
-
dt = datetime.strptime(str(date_str), '%d/%m/%Y')
|
49 |
-
return dt
|
50 |
-
except ValueError:
|
51 |
-
try:
|
52 |
-
# Then try YYYY-MM-DD
|
53 |
-
dt = datetime.strptime(str(date_str), '%Y-%m-%d')
|
54 |
-
return dt
|
55 |
-
except ValueError:
|
56 |
-
# If all else fails, try pandas' flexible parser
|
57 |
-
return pd.to_datetime(date_str, format='mixed', dayfirst=True)
|
58 |
-
|
59 |
-
# Read the Excel file with the custom date parser
|
60 |
-
purchase_history = pd.read_excel(
|
61 |
-
excel_content,
|
62 |
-
sheet_name='Transaction History'
|
63 |
-
)
|
64 |
-
|
65 |
-
# Convert Purchase_Date column separately with error handling
|
66 |
-
def safe_parse_date(date_val):
|
67 |
-
if pd.isna(date_val):
|
68 |
-
return None
|
69 |
-
try:
|
70 |
-
if isinstance(date_val, (datetime, pd.Timestamp)):
|
71 |
-
return date_val
|
72 |
-
return custom_date_parser(date_val)
|
73 |
-
except Exception as e:
|
74 |
-
logger.error(f"Error parsing date {date_val}: {str(e)}")
|
75 |
-
return None
|
76 |
-
|
77 |
-
# Convert dates and handle any parsing errors
|
78 |
-
purchase_history['Purchase_Date'] = purchase_history['Purchase_Date'].apply(safe_parse_date)
|
79 |
-
|
80 |
-
# Remove any rows where date parsing failed
|
81 |
-
purchase_history = purchase_history.dropna(subset=['Purchase_Date'])
|
82 |
-
|
83 |
-
# Log some information about the dates for debugging
|
84 |
-
logger.info(f"Date column type: {purchase_history['Purchase_Date'].dtype}")
|
85 |
-
logger.info(f"Sample dates: {purchase_history['Purchase_Date'].head()}")
|
86 |
-
logger.info(f"Number of valid dates: {purchase_history['Purchase_Date'].count()}")
|
87 |
|
88 |
# Read Customer Profile sheet
|
89 |
excel_content.seek(0) # Reset buffer position
|
@@ -91,18 +46,9 @@ try:
|
|
91 |
|
92 |
# Read Social Media Sentiment
|
93 |
excel_content.seek(0) # Reset buffer position
|
94 |
-
customer_Media = pd.read_excel(
|
95 |
-
excel_content,
|
96 |
-
sheet_name='Social Media Sentiment',
|
97 |
-
parse_dates=['Timestamp']
|
98 |
-
)
|
99 |
|
100 |
logger.info("Successfully downloaded and loaded Excel file")
|
101 |
-
|
102 |
-
except Exception as e:
|
103 |
-
logger.error(f"Error downloading or processing data: {str(e)}")
|
104 |
-
raise
|
105 |
-
|
106 |
|
107 |
# Process the data
|
108 |
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
|
@@ -613,233 +559,3 @@ class FinancialAnalysisResponse(BaseModel):
|
|
613 |
financial_health_score: int
|
614 |
action_items: List[str]
|
615 |
|
616 |
-
@app.get("/financial-analysis/{customer_id}", response_model=FinancialAnalysisResponse)
|
617 |
-
async def get_financial_analysis(customer_id: str):
|
618 |
-
try:
|
619 |
-
# Validate customer
|
620 |
-
if customer_id not in purchase_history['Customer_Id'].unique():
|
621 |
-
raise HTTPException(
|
622 |
-
status_code=status.HTTP_404_NOT_FOUND,
|
623 |
-
detail="Customer not found"
|
624 |
-
)
|
625 |
-
|
626 |
-
# Get customer profile and transactions
|
627 |
-
customer_profile = customer_profiles[customer_profiles['Customer_Id'] == customer_id].iloc[0]
|
628 |
-
customer_transactions = purchase_history[purchase_history['Customer_Id'] == customer_id].copy()
|
629 |
-
|
630 |
-
# Ensure Purchase_Date is in datetime format
|
631 |
-
if not pd.api.types.is_datetime64_any_dtype(customer_transactions['Purchase_Date']):
|
632 |
-
customer_transactions['Purchase_Date'] = pd.to_datetime(
|
633 |
-
customer_transactions['Purchase_Date'],
|
634 |
-
format='mixed',
|
635 |
-
dayfirst=False
|
636 |
-
)
|
637 |
-
|
638 |
-
# Print debug information
|
639 |
-
logger.info(f"Date column type: {customer_transactions['Purchase_Date'].dtype}")
|
640 |
-
logger.info(f"Sample dates: {customer_transactions['Purchase_Date'].head()}")
|
641 |
-
|
642 |
-
# Calculate basic financial metrics
|
643 |
-
current_date = datetime.now()
|
644 |
-
thirty_days_ago = current_date - timedelta(days=30)
|
645 |
-
sixty_days_ago = current_date - timedelta(days=60)
|
646 |
-
|
647 |
-
current_month_spending = customer_transactions[
|
648 |
-
customer_transactions['Purchase_Date'] >= thirty_days_ago
|
649 |
-
]['Amount (In Dollars)'].sum()
|
650 |
-
|
651 |
-
previous_month_spending = customer_transactions[
|
652 |
-
(customer_transactions['Purchase_Date'] >= sixty_days_ago) &
|
653 |
-
(customer_transactions['Purchase_Date'] < thirty_days_ago)
|
654 |
-
]['Amount (In Dollars)'].sum()
|
655 |
-
|
656 |
-
# Calculate monthly average spending
|
657 |
-
monthly_groups = customer_transactions.groupby(
|
658 |
-
customer_transactions['Purchase_Date'].dt.to_period('M')
|
659 |
-
)['Amount (In Dollars)'].sum()
|
660 |
-
|
661 |
-
monthly_avg_spend = monthly_groups.mean() if not monthly_groups.empty else 0.0
|
662 |
-
|
663 |
-
# Calculate spend trend
|
664 |
-
spend_trend = ((current_month_spending - previous_month_spending) / previous_month_spending * 100) if previous_month_spending > 0 else 0
|
665 |
-
|
666 |
-
# Calculate spending categories
|
667 |
-
spending_categories = customer_transactions.groupby('Category')['Amount (In Dollars)'].sum().reset_index()
|
668 |
-
spending_categories_list = [
|
669 |
-
{"category": row['Category'], "amount": float(row['Amount (In Dollars)'])}
|
670 |
-
for _, row in spending_categories.iterrows()
|
671 |
-
]
|
672 |
-
|
673 |
-
# Generate spending insights
|
674 |
-
spending_insights = []
|
675 |
-
|
676 |
-
# Category-based insights
|
677 |
-
for category in spending_categories.itertuples():
|
678 |
-
category_avg = category._2 / max(1, len(monthly_groups)) # Using _2 for the Amount column
|
679 |
-
if category_avg > monthly_avg_spend * 0.3:
|
680 |
-
spending_insights.append(f"High spending in {category.Category}: ${category_avg:.2f}/month")
|
681 |
-
|
682 |
-
# Age-based recommendations
|
683 |
-
age = float(customer_profile['Age'])
|
684 |
-
income = float(customer_profile['Income per year (in dollars)'])
|
685 |
-
|
686 |
-
# Determine credit score range (simulated based on age and income)
|
687 |
-
base_score = min(max((age * 10 + income / 1000) / 2, 300), 850)
|
688 |
-
credit_score_range = f"{int(base_score-25)}-{int(base_score+25)}"
|
689 |
-
credit_score_change = "↑ improving" if age > 25 and income > 50000 else "stable"
|
690 |
-
|
691 |
-
# Calculate investment potential (simplified)
|
692 |
-
monthly_income = income / 12
|
693 |
-
investment_potential = max(0, monthly_income - monthly_avg_spend * 1.2)
|
694 |
-
|
695 |
-
# Generate recommendations based on age and income
|
696 |
-
card_recommendations = []
|
697 |
-
investment_recommendations = []
|
698 |
-
real_estate_recommendations = []
|
699 |
-
|
700 |
-
# Credit Card Recommendations
|
701 |
-
if income < 50000:
|
702 |
-
card_recommendations.append(CardRecommendation(
|
703 |
-
name="Cash Back Starter Card",
|
704 |
-
annual_fee=0,
|
705 |
-
rewards_rate=1.5,
|
706 |
-
benefits="No annual fee, 1.5% cash back on all purchases"
|
707 |
-
))
|
708 |
-
elif income < 100000:
|
709 |
-
card_recommendations.append(CardRecommendation(
|
710 |
-
name="Premium Rewards Card",
|
711 |
-
annual_fee=95,
|
712 |
-
rewards_rate=2.5,
|
713 |
-
benefits="Travel insurance, cash back on all purchases, airport lounge access"
|
714 |
-
))
|
715 |
-
else:
|
716 |
-
card_recommendations.append(CardRecommendation(
|
717 |
-
name="Elite Travel Card",
|
718 |
-
annual_fee=495,
|
719 |
-
rewards_rate=3.0,
|
720 |
-
benefits="Comprehensive travel benefits, concierge service, premium insurance"
|
721 |
-
))
|
722 |
-
|
723 |
-
# Investment Recommendations
|
724 |
-
if age < 30:
|
725 |
-
investment_recommendations.extend([
|
726 |
-
InvestmentRecommendation(
|
727 |
-
type="Index Fund",
|
728 |
-
risk_level="High",
|
729 |
-
expected_return=10.0,
|
730 |
-
min_investment=1000.0
|
731 |
-
),
|
732 |
-
InvestmentRecommendation(
|
733 |
-
type="Tech Growth ETF",
|
734 |
-
risk_level="High",
|
735 |
-
expected_return=12.0,
|
736 |
-
min_investment=2000.0
|
737 |
-
)
|
738 |
-
])
|
739 |
-
elif age < 50:
|
740 |
-
investment_recommendations.extend([
|
741 |
-
InvestmentRecommendation(
|
742 |
-
type="Balanced Fund",
|
743 |
-
risk_level="Medium",
|
744 |
-
expected_return=8.0,
|
745 |
-
min_investment=5000.0
|
746 |
-
),
|
747 |
-
InvestmentRecommendation(
|
748 |
-
type="Dividend Growth Stocks",
|
749 |
-
risk_level="Medium",
|
750 |
-
expected_return=7.0,
|
751 |
-
min_investment=10000.0
|
752 |
-
)
|
753 |
-
])
|
754 |
-
else:
|
755 |
-
investment_recommendations.extend([
|
756 |
-
InvestmentRecommendation(
|
757 |
-
type="Bond Fund",
|
758 |
-
risk_level="Low",
|
759 |
-
expected_return=5.0,
|
760 |
-
min_investment=10000.0
|
761 |
-
),
|
762 |
-
InvestmentRecommendation(
|
763 |
-
type="Income Fund",
|
764 |
-
risk_level="Low",
|
765 |
-
expected_return=4.0,
|
766 |
-
min_investment=25000.0
|
767 |
-
)
|
768 |
-
])
|
769 |
-
|
770 |
-
# Real Estate Recommendations
|
771 |
-
if income > 75000:
|
772 |
-
mortgage_capacity = (income * 4) * 0.8 # 80% of 4x annual income
|
773 |
-
monthly_payment = (mortgage_capacity * 0.05) / 12 # Simplified mortgage calculation
|
774 |
-
|
775 |
-
real_estate_recommendations.extend([
|
776 |
-
RealEstateRecommendation(
|
777 |
-
type="Starter Home",
|
778 |
-
location="Suburban Area",
|
779 |
-
price=mortgage_capacity,
|
780 |
-
monthly_payment=monthly_payment,
|
781 |
-
roi_potential=5.0
|
782 |
-
)
|
783 |
-
])
|
784 |
-
|
785 |
-
if income > 150000:
|
786 |
-
real_estate_recommendations.append(
|
787 |
-
RealEstateRecommendation(
|
788 |
-
type="Investment Property",
|
789 |
-
location="Urban Center",
|
790 |
-
price=mortgage_capacity * 0.7,
|
791 |
-
monthly_payment=monthly_payment * 0.7,
|
792 |
-
roi_potential=8.0
|
793 |
-
)
|
794 |
-
)
|
795 |
-
|
796 |
-
# Calculate financial health score
|
797 |
-
savings_ratio = max(0, min(1, (income - monthly_avg_spend * 12) / income))
|
798 |
-
diversity_score = len(spending_categories) / 10
|
799 |
-
stability_score = min(1, len(customer_transactions) / 100)
|
800 |
-
|
801 |
-
financial_health_score = int((savings_ratio * 0.4 + diversity_score * 0.3 + stability_score * 0.3) * 100)
|
802 |
-
|
803 |
-
# Generate action items
|
804 |
-
action_items = []
|
805 |
-
if savings_ratio < 0.2:
|
806 |
-
action_items.append("Increase monthly savings to at least 20% of income")
|
807 |
-
if monthly_avg_spend > monthly_income * 0.7:
|
808 |
-
action_items.append("Review monthly expenses to reduce spending")
|
809 |
-
if len(investment_recommendations) > 0:
|
810 |
-
action_items.append(f"Consider investing in {investment_recommendations[0].type}")
|
811 |
-
|
812 |
-
# Get recent transactions
|
813 |
-
recent_transactions = [
|
814 |
-
{
|
815 |
-
"date": row['Purchase_Date'].strftime('%Y-%m-%d'),
|
816 |
-
"amount": float(row['Amount (In Dollars)'])
|
817 |
-
}
|
818 |
-
for _, row in customer_transactions.sort_values('Purchase_Date', ascending=False).head(10).iterrows()
|
819 |
-
]
|
820 |
-
|
821 |
-
return {
|
822 |
-
"monthly_avg_spend": float(monthly_avg_spend),
|
823 |
-
"spend_trend": float(spend_trend),
|
824 |
-
"credit_score_range": credit_score_range,
|
825 |
-
"credit_score_change": credit_score_change,
|
826 |
-
"investment_potential": float(investment_potential),
|
827 |
-
"spending_categories": spending_categories_list,
|
828 |
-
"spending_insights": spending_insights,
|
829 |
-
"recent_transactions": recent_transactions,
|
830 |
-
"card_recommendations": card_recommendations,
|
831 |
-
"investment_recommendations": investment_recommendations,
|
832 |
-
"real_estate_recommendations": real_estate_recommendations,
|
833 |
-
"financial_health_score": financial_health_score,
|
834 |
-
"action_items": action_items
|
835 |
-
}
|
836 |
-
|
837 |
-
except HTTPException:
|
838 |
-
raise
|
839 |
-
except Exception as e:
|
840 |
-
logger.error(f"Error processing financial analysis for customer {customer_id}: {str(e)}")
|
841 |
-
raise HTTPException(
|
842 |
-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
843 |
-
detail=f"Error processing request: {str(e)}"
|
844 |
-
)
|
845 |
-
|
|
|
28 |
|
29 |
# URL of the Excel file
|
30 |
EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
|
31 |
+
|
32 |
try:
|
33 |
# Download the file from URL
|
34 |
logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
|
|
|
37 |
|
38 |
# Read the Excel file from the downloaded content
|
39 |
excel_content = io.BytesIO(response.content)
|
40 |
+
purchase_history = pd.read_excel(excel_content, sheet_name='Transaction History',
|
41 |
+
parse_dates=['Purchase_Date'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
# Read Customer Profile sheet
|
44 |
excel_content.seek(0) # Reset buffer position
|
|
|
46 |
|
47 |
# Read Social Media Sentiment
|
48 |
excel_content.seek(0) # Reset buffer position
|
49 |
+
customer_Media = pd.read_excel(excel_content, sheet_name='Social Media Sentiment',parse_dates=['Timestamp'])
|
|
|
|
|
|
|
|
|
50 |
|
51 |
logger.info("Successfully downloaded and loaded Excel file")
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
# Process the data
|
54 |
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
|
|
|
559 |
financial_health_score: int
|
560 |
action_items: List[str]
|
561 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|