Vaibhav84 commited on
Commit
0892eee
·
1 Parent(s): 3d4d835
Files changed (1) hide show
  1. app.py +4 -288
app.py CHANGED
@@ -28,6 +28,7 @@ app = FastAPI()
28
 
29
  # URL of the Excel file
30
  EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
 
31
  try:
32
  # Download the file from URL
33
  logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
@@ -36,54 +37,8 @@ try:
36
 
37
  # Read the Excel file from the downloaded content
38
  excel_content = io.BytesIO(response.content)
39
-
40
- def custom_date_parser(date_str):
41
- try:
42
- # First try to parse as MM/DD/YYYY
43
- dt = datetime.strptime(str(date_str), '%m/%d/%Y')
44
- return dt
45
- except ValueError:
46
- try:
47
- # Then try DD/MM/YYYY
48
- dt = datetime.strptime(str(date_str), '%d/%m/%Y')
49
- return dt
50
- except ValueError:
51
- try:
52
- # Then try YYYY-MM-DD
53
- dt = datetime.strptime(str(date_str), '%Y-%m-%d')
54
- return dt
55
- except ValueError:
56
- # If all else fails, try pandas' flexible parser
57
- return pd.to_datetime(date_str, format='mixed', dayfirst=True)
58
-
59
- # Read the Excel file with the custom date parser
60
- purchase_history = pd.read_excel(
61
- excel_content,
62
- sheet_name='Transaction History'
63
- )
64
-
65
- # Convert Purchase_Date column separately with error handling
66
- def safe_parse_date(date_val):
67
- if pd.isna(date_val):
68
- return None
69
- try:
70
- if isinstance(date_val, (datetime, pd.Timestamp)):
71
- return date_val
72
- return custom_date_parser(date_val)
73
- except Exception as e:
74
- logger.error(f"Error parsing date {date_val}: {str(e)}")
75
- return None
76
-
77
- # Convert dates and handle any parsing errors
78
- purchase_history['Purchase_Date'] = purchase_history['Purchase_Date'].apply(safe_parse_date)
79
-
80
- # Remove any rows where date parsing failed
81
- purchase_history = purchase_history.dropna(subset=['Purchase_Date'])
82
-
83
- # Log some information about the dates for debugging
84
- logger.info(f"Date column type: {purchase_history['Purchase_Date'].dtype}")
85
- logger.info(f"Sample dates: {purchase_history['Purchase_Date'].head()}")
86
- logger.info(f"Number of valid dates: {purchase_history['Purchase_Date'].count()}")
87
 
88
  # Read Customer Profile sheet
89
  excel_content.seek(0) # Reset buffer position
@@ -91,18 +46,9 @@ try:
91
 
92
  # Read Social Media Sentiment
93
  excel_content.seek(0) # Reset buffer position
94
- customer_Media = pd.read_excel(
95
- excel_content,
96
- sheet_name='Social Media Sentiment',
97
- parse_dates=['Timestamp']
98
- )
99
 
100
  logger.info("Successfully downloaded and loaded Excel file")
101
-
102
- except Exception as e:
103
- logger.error(f"Error downloading or processing data: {str(e)}")
104
- raise
105
-
106
 
107
  # Process the data
108
  purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
@@ -613,233 +559,3 @@ class FinancialAnalysisResponse(BaseModel):
613
  financial_health_score: int
614
  action_items: List[str]
615
 
616
- @app.get("/financial-analysis/{customer_id}", response_model=FinancialAnalysisResponse)
617
- async def get_financial_analysis(customer_id: str):
618
- try:
619
- # Validate customer
620
- if customer_id not in purchase_history['Customer_Id'].unique():
621
- raise HTTPException(
622
- status_code=status.HTTP_404_NOT_FOUND,
623
- detail="Customer not found"
624
- )
625
-
626
- # Get customer profile and transactions
627
- customer_profile = customer_profiles[customer_profiles['Customer_Id'] == customer_id].iloc[0]
628
- customer_transactions = purchase_history[purchase_history['Customer_Id'] == customer_id].copy()
629
-
630
- # Ensure Purchase_Date is in datetime format
631
- if not pd.api.types.is_datetime64_any_dtype(customer_transactions['Purchase_Date']):
632
- customer_transactions['Purchase_Date'] = pd.to_datetime(
633
- customer_transactions['Purchase_Date'],
634
- format='mixed',
635
- dayfirst=False
636
- )
637
-
638
- # Print debug information
639
- logger.info(f"Date column type: {customer_transactions['Purchase_Date'].dtype}")
640
- logger.info(f"Sample dates: {customer_transactions['Purchase_Date'].head()}")
641
-
642
- # Calculate basic financial metrics
643
- current_date = datetime.now()
644
- thirty_days_ago = current_date - timedelta(days=30)
645
- sixty_days_ago = current_date - timedelta(days=60)
646
-
647
- current_month_spending = customer_transactions[
648
- customer_transactions['Purchase_Date'] >= thirty_days_ago
649
- ]['Amount (In Dollars)'].sum()
650
-
651
- previous_month_spending = customer_transactions[
652
- (customer_transactions['Purchase_Date'] >= sixty_days_ago) &
653
- (customer_transactions['Purchase_Date'] < thirty_days_ago)
654
- ]['Amount (In Dollars)'].sum()
655
-
656
- # Calculate monthly average spending
657
- monthly_groups = customer_transactions.groupby(
658
- customer_transactions['Purchase_Date'].dt.to_period('M')
659
- )['Amount (In Dollars)'].sum()
660
-
661
- monthly_avg_spend = monthly_groups.mean() if not monthly_groups.empty else 0.0
662
-
663
- # Calculate spend trend
664
- spend_trend = ((current_month_spending - previous_month_spending) / previous_month_spending * 100) if previous_month_spending > 0 else 0
665
-
666
- # Calculate spending categories
667
- spending_categories = customer_transactions.groupby('Category')['Amount (In Dollars)'].sum().reset_index()
668
- spending_categories_list = [
669
- {"category": row['Category'], "amount": float(row['Amount (In Dollars)'])}
670
- for _, row in spending_categories.iterrows()
671
- ]
672
-
673
- # Generate spending insights
674
- spending_insights = []
675
-
676
- # Category-based insights
677
- for category in spending_categories.itertuples():
678
- category_avg = category._2 / max(1, len(monthly_groups)) # Using _2 for the Amount column
679
- if category_avg > monthly_avg_spend * 0.3:
680
- spending_insights.append(f"High spending in {category.Category}: ${category_avg:.2f}/month")
681
-
682
- # Age-based recommendations
683
- age = float(customer_profile['Age'])
684
- income = float(customer_profile['Income per year (in dollars)'])
685
-
686
- # Determine credit score range (simulated based on age and income)
687
- base_score = min(max((age * 10 + income / 1000) / 2, 300), 850)
688
- credit_score_range = f"{int(base_score-25)}-{int(base_score+25)}"
689
- credit_score_change = "↑ improving" if age > 25 and income > 50000 else "stable"
690
-
691
- # Calculate investment potential (simplified)
692
- monthly_income = income / 12
693
- investment_potential = max(0, monthly_income - monthly_avg_spend * 1.2)
694
-
695
- # Generate recommendations based on age and income
696
- card_recommendations = []
697
- investment_recommendations = []
698
- real_estate_recommendations = []
699
-
700
- # Credit Card Recommendations
701
- if income < 50000:
702
- card_recommendations.append(CardRecommendation(
703
- name="Cash Back Starter Card",
704
- annual_fee=0,
705
- rewards_rate=1.5,
706
- benefits="No annual fee, 1.5% cash back on all purchases"
707
- ))
708
- elif income < 100000:
709
- card_recommendations.append(CardRecommendation(
710
- name="Premium Rewards Card",
711
- annual_fee=95,
712
- rewards_rate=2.5,
713
- benefits="Travel insurance, cash back on all purchases, airport lounge access"
714
- ))
715
- else:
716
- card_recommendations.append(CardRecommendation(
717
- name="Elite Travel Card",
718
- annual_fee=495,
719
- rewards_rate=3.0,
720
- benefits="Comprehensive travel benefits, concierge service, premium insurance"
721
- ))
722
-
723
- # Investment Recommendations
724
- if age < 30:
725
- investment_recommendations.extend([
726
- InvestmentRecommendation(
727
- type="Index Fund",
728
- risk_level="High",
729
- expected_return=10.0,
730
- min_investment=1000.0
731
- ),
732
- InvestmentRecommendation(
733
- type="Tech Growth ETF",
734
- risk_level="High",
735
- expected_return=12.0,
736
- min_investment=2000.0
737
- )
738
- ])
739
- elif age < 50:
740
- investment_recommendations.extend([
741
- InvestmentRecommendation(
742
- type="Balanced Fund",
743
- risk_level="Medium",
744
- expected_return=8.0,
745
- min_investment=5000.0
746
- ),
747
- InvestmentRecommendation(
748
- type="Dividend Growth Stocks",
749
- risk_level="Medium",
750
- expected_return=7.0,
751
- min_investment=10000.0
752
- )
753
- ])
754
- else:
755
- investment_recommendations.extend([
756
- InvestmentRecommendation(
757
- type="Bond Fund",
758
- risk_level="Low",
759
- expected_return=5.0,
760
- min_investment=10000.0
761
- ),
762
- InvestmentRecommendation(
763
- type="Income Fund",
764
- risk_level="Low",
765
- expected_return=4.0,
766
- min_investment=25000.0
767
- )
768
- ])
769
-
770
- # Real Estate Recommendations
771
- if income > 75000:
772
- mortgage_capacity = (income * 4) * 0.8 # 80% of 4x annual income
773
- monthly_payment = (mortgage_capacity * 0.05) / 12 # Simplified mortgage calculation
774
-
775
- real_estate_recommendations.extend([
776
- RealEstateRecommendation(
777
- type="Starter Home",
778
- location="Suburban Area",
779
- price=mortgage_capacity,
780
- monthly_payment=monthly_payment,
781
- roi_potential=5.0
782
- )
783
- ])
784
-
785
- if income > 150000:
786
- real_estate_recommendations.append(
787
- RealEstateRecommendation(
788
- type="Investment Property",
789
- location="Urban Center",
790
- price=mortgage_capacity * 0.7,
791
- monthly_payment=monthly_payment * 0.7,
792
- roi_potential=8.0
793
- )
794
- )
795
-
796
- # Calculate financial health score
797
- savings_ratio = max(0, min(1, (income - monthly_avg_spend * 12) / income))
798
- diversity_score = len(spending_categories) / 10
799
- stability_score = min(1, len(customer_transactions) / 100)
800
-
801
- financial_health_score = int((savings_ratio * 0.4 + diversity_score * 0.3 + stability_score * 0.3) * 100)
802
-
803
- # Generate action items
804
- action_items = []
805
- if savings_ratio < 0.2:
806
- action_items.append("Increase monthly savings to at least 20% of income")
807
- if monthly_avg_spend > monthly_income * 0.7:
808
- action_items.append("Review monthly expenses to reduce spending")
809
- if len(investment_recommendations) > 0:
810
- action_items.append(f"Consider investing in {investment_recommendations[0].type}")
811
-
812
- # Get recent transactions
813
- recent_transactions = [
814
- {
815
- "date": row['Purchase_Date'].strftime('%Y-%m-%d'),
816
- "amount": float(row['Amount (In Dollars)'])
817
- }
818
- for _, row in customer_transactions.sort_values('Purchase_Date', ascending=False).head(10).iterrows()
819
- ]
820
-
821
- return {
822
- "monthly_avg_spend": float(monthly_avg_spend),
823
- "spend_trend": float(spend_trend),
824
- "credit_score_range": credit_score_range,
825
- "credit_score_change": credit_score_change,
826
- "investment_potential": float(investment_potential),
827
- "spending_categories": spending_categories_list,
828
- "spending_insights": spending_insights,
829
- "recent_transactions": recent_transactions,
830
- "card_recommendations": card_recommendations,
831
- "investment_recommendations": investment_recommendations,
832
- "real_estate_recommendations": real_estate_recommendations,
833
- "financial_health_score": financial_health_score,
834
- "action_items": action_items
835
- }
836
-
837
- except HTTPException:
838
- raise
839
- except Exception as e:
840
- logger.error(f"Error processing financial analysis for customer {customer_id}: {str(e)}")
841
- raise HTTPException(
842
- status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
843
- detail=f"Error processing request: {str(e)}"
844
- )
845
-
 
28
 
29
  # URL of the Excel file
30
  EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
31
+
32
  try:
33
  # Download the file from URL
34
  logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
 
37
 
38
  # Read the Excel file from the downloaded content
39
  excel_content = io.BytesIO(response.content)
40
+ purchase_history = pd.read_excel(excel_content, sheet_name='Transaction History',
41
+ parse_dates=['Purchase_Date'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Read Customer Profile sheet
44
  excel_content.seek(0) # Reset buffer position
 
46
 
47
  # Read Social Media Sentiment
48
  excel_content.seek(0) # Reset buffer position
49
+ customer_Media = pd.read_excel(excel_content, sheet_name='Social Media Sentiment',parse_dates=['Timestamp'])
 
 
 
 
50
 
51
  logger.info("Successfully downloaded and loaded Excel file")
 
 
 
 
 
52
 
53
  # Process the data
54
  purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
 
559
  financial_health_score: int
560
  action_items: List[str]
561