RecommendationAPI

Sleeping

App Files Files Community

Vaibhav84 commited on Mar 22

Commit

8551e3d

1 Parent(s): a3d4e2f

api

Browse files

Files changed (1) hide show

app.py +53 -15

app.py CHANGED Viewed

@@ -28,7 +28,6 @@ app = FastAPI()
 # URL of the Excel file
 EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
 try:
     # Download the file from URL
     logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
@@ -37,34 +36,73 @@ try:
     # Read the Excel file from the downloaded content
     excel_content = io.BytesIO(response.content)
     def custom_date_parser(date_str):
         try:
-            return pd.to_datetime(date_str, format='%m/%d/%Y')
-        except:
             try:
-                return pd.to_datetime(date_str, format='%Y-%m-%d')
-            except:
-                return pd.to_datetime(date_str, format='mixed', dayfirst=False)
-    purchase_history = pd.read_excel(excel_content, sheet_name='Transaction History',
-                                   parse_dates=['Purchase_Date'],date_parser=custom_date_parser)
-     # Ensure Purchase_Date is datetime
-    purchase_history['Purchase_Date'] = pd.to_datetime(
-        purchase_history['Purchase_Date'],
-        format='mixed',
-        dayfirst=False
     )
     # Read Customer Profile sheet
     excel_content.seek(0)  # Reset buffer position
     customer_profiles = pd.read_excel(excel_content, sheet_name='Customer Profile (Individual)')
     # Read Social Media Sentiment
     excel_content.seek(0)  # Reset buffer position
-    customer_Media = pd.read_excel(excel_content, sheet_name='Social Media Sentiment',parse_dates=['Timestamp'])
     logger.info("Successfully downloaded and loaded Excel file")
     # Process the data
     purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)

 # URL of the Excel file
 EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
 try:
     # Download the file from URL
     logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
     # Read the Excel file from the downloaded content
     excel_content = io.BytesIO(response.content)
     def custom_date_parser(date_str):
         try:
+            # First try to parse as MM/DD/YYYY
+            dt = datetime.strptime(str(date_str), '%m/%d/%Y')
+            return dt
+        except ValueError:
             try:
+                # Then try DD/MM/YYYY
+                dt = datetime.strptime(str(date_str), '%d/%m/%Y')
+                return dt
+            except ValueError:
+                try:
+                    # Then try YYYY-MM-DD
+                    dt = datetime.strptime(str(date_str), '%Y-%m-%d')
+                    return dt
+                except ValueError:
+                    # If all else fails, try pandas' flexible parser
+                    return pd.to_datetime(date_str, format='mixed', dayfirst=True)
+    # Read the Excel file with the custom date parser
+    purchase_history = pd.read_excel(
+        excel_content,
+        sheet_name='Transaction History'
     )
+    # Convert Purchase_Date column separately with error handling
+    def safe_parse_date(date_val):
+        if pd.isna(date_val):
+            return None
+        try:
+            if isinstance(date_val, (datetime, pd.Timestamp)):
+                return date_val
+            return custom_date_parser(date_val)
+        except Exception as e:
+            logger.error(f"Error parsing date {date_val}: {str(e)}")
+            return None
+    # Convert dates and handle any parsing errors
+    purchase_history['Purchase_Date'] = purchase_history['Purchase_Date'].apply(safe_parse_date)
+    # Remove any rows where date parsing failed
+    purchase_history = purchase_history.dropna(subset=['Purchase_Date'])
+    # Log some information about the dates for debugging
+    logger.info(f"Date column type: {purchase_history['Purchase_Date'].dtype}")
+    logger.info(f"Sample dates: {purchase_history['Purchase_Date'].head()}")
+    logger.info(f"Number of valid dates: {purchase_history['Purchase_Date'].count()}")
     # Read Customer Profile sheet
     excel_content.seek(0)  # Reset buffer position
     customer_profiles = pd.read_excel(excel_content, sheet_name='Customer Profile (Individual)')
     # Read Social Media Sentiment
     excel_content.seek(0)  # Reset buffer position
+    customer_Media = pd.read_excel(
+        excel_content,
+        sheet_name='Social Media Sentiment',
+        parse_dates=['Timestamp']
+    )
     logger.info("Successfully downloaded and loaded Excel file")
+except Exception as e:
+    logger.error(f"Error downloading or processing data: {str(e)}")
+    raise
     # Process the data
     purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)