Vaibhav84 commited on
Commit
8551e3d
·
1 Parent(s): a3d4e2f
Files changed (1) hide show
  1. app.py +53 -15
app.py CHANGED
@@ -28,7 +28,6 @@ app = FastAPI()
28
 
29
  # URL of the Excel file
30
  EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
31
-
32
  try:
33
  # Download the file from URL
34
  logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
@@ -37,34 +36,73 @@ try:
37
 
38
  # Read the Excel file from the downloaded content
39
  excel_content = io.BytesIO(response.content)
 
40
  def custom_date_parser(date_str):
41
  try:
42
- return pd.to_datetime(date_str, format='%m/%d/%Y')
43
- except:
 
 
44
  try:
45
- return pd.to_datetime(date_str, format='%Y-%m-%d')
46
- except:
47
- return pd.to_datetime(date_str, format='mixed', dayfirst=False)
48
-
49
- purchase_history = pd.read_excel(excel_content, sheet_name='Transaction History',
50
- parse_dates=['Purchase_Date'],date_parser=custom_date_parser)
 
 
 
 
 
51
 
52
- # Ensure Purchase_Date is datetime
53
- purchase_history['Purchase_Date'] = pd.to_datetime(
54
- purchase_history['Purchase_Date'],
55
- format='mixed',
56
- dayfirst=False
57
  )
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Read Customer Profile sheet
60
  excel_content.seek(0) # Reset buffer position
61
  customer_profiles = pd.read_excel(excel_content, sheet_name='Customer Profile (Individual)')
62
 
63
  # Read Social Media Sentiment
64
  excel_content.seek(0) # Reset buffer position
65
- customer_Media = pd.read_excel(excel_content, sheet_name='Social Media Sentiment',parse_dates=['Timestamp'])
 
 
 
 
66
 
67
  logger.info("Successfully downloaded and loaded Excel file")
 
 
 
 
 
68
 
69
  # Process the data
70
  purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
 
28
 
29
  # URL of the Excel file
30
  EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
 
31
  try:
32
  # Download the file from URL
33
  logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
 
36
 
37
  # Read the Excel file from the downloaded content
38
  excel_content = io.BytesIO(response.content)
39
+
40
  def custom_date_parser(date_str):
41
  try:
42
+ # First try to parse as MM/DD/YYYY
43
+ dt = datetime.strptime(str(date_str), '%m/%d/%Y')
44
+ return dt
45
+ except ValueError:
46
  try:
47
+ # Then try DD/MM/YYYY
48
+ dt = datetime.strptime(str(date_str), '%d/%m/%Y')
49
+ return dt
50
+ except ValueError:
51
+ try:
52
+ # Then try YYYY-MM-DD
53
+ dt = datetime.strptime(str(date_str), '%Y-%m-%d')
54
+ return dt
55
+ except ValueError:
56
+ # If all else fails, try pandas' flexible parser
57
+ return pd.to_datetime(date_str, format='mixed', dayfirst=True)
58
 
59
+ # Read the Excel file with the custom date parser
60
+ purchase_history = pd.read_excel(
61
+ excel_content,
62
+ sheet_name='Transaction History'
 
63
  )
64
 
65
+ # Convert Purchase_Date column separately with error handling
66
+ def safe_parse_date(date_val):
67
+ if pd.isna(date_val):
68
+ return None
69
+ try:
70
+ if isinstance(date_val, (datetime, pd.Timestamp)):
71
+ return date_val
72
+ return custom_date_parser(date_val)
73
+ except Exception as e:
74
+ logger.error(f"Error parsing date {date_val}: {str(e)}")
75
+ return None
76
+
77
+ # Convert dates and handle any parsing errors
78
+ purchase_history['Purchase_Date'] = purchase_history['Purchase_Date'].apply(safe_parse_date)
79
+
80
+ # Remove any rows where date parsing failed
81
+ purchase_history = purchase_history.dropna(subset=['Purchase_Date'])
82
+
83
+ # Log some information about the dates for debugging
84
+ logger.info(f"Date column type: {purchase_history['Purchase_Date'].dtype}")
85
+ logger.info(f"Sample dates: {purchase_history['Purchase_Date'].head()}")
86
+ logger.info(f"Number of valid dates: {purchase_history['Purchase_Date'].count()}")
87
+
88
  # Read Customer Profile sheet
89
  excel_content.seek(0) # Reset buffer position
90
  customer_profiles = pd.read_excel(excel_content, sheet_name='Customer Profile (Individual)')
91
 
92
  # Read Social Media Sentiment
93
  excel_content.seek(0) # Reset buffer position
94
+ customer_Media = pd.read_excel(
95
+ excel_content,
96
+ sheet_name='Social Media Sentiment',
97
+ parse_dates=['Timestamp']
98
+ )
99
 
100
  logger.info("Successfully downloaded and loaded Excel file")
101
+
102
+ except Exception as e:
103
+ logger.error(f"Error downloading or processing data: {str(e)}")
104
+ raise
105
+
106
 
107
  # Process the data
108
  purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)