Spaces:
Sleeping
Sleeping
api
Browse files
app.py
CHANGED
@@ -28,7 +28,6 @@ app = FastAPI()
|
|
28 |
|
29 |
# URL of the Excel file
|
30 |
EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
|
31 |
-
|
32 |
try:
|
33 |
# Download the file from URL
|
34 |
logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
|
@@ -37,34 +36,73 @@ try:
|
|
37 |
|
38 |
# Read the Excel file from the downloaded content
|
39 |
excel_content = io.BytesIO(response.content)
|
|
|
40 |
def custom_date_parser(date_str):
|
41 |
try:
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
try:
|
45 |
-
|
46 |
-
|
47 |
-
return
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
|
53 |
-
purchase_history
|
54 |
-
|
55 |
-
|
56 |
-
dayfirst=False
|
57 |
)
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
# Read Customer Profile sheet
|
60 |
excel_content.seek(0) # Reset buffer position
|
61 |
customer_profiles = pd.read_excel(excel_content, sheet_name='Customer Profile (Individual)')
|
62 |
|
63 |
# Read Social Media Sentiment
|
64 |
excel_content.seek(0) # Reset buffer position
|
65 |
-
customer_Media = pd.read_excel(
|
|
|
|
|
|
|
|
|
66 |
|
67 |
logger.info("Successfully downloaded and loaded Excel file")
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
# Process the data
|
70 |
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
|
|
|
28 |
|
29 |
# URL of the Excel file
|
30 |
EXCEL_URL = "https://huggingface.co/spaces/Vaibhav84/RecommendationAPI/resolve/main/DataSetSample.xlsx"
|
|
|
31 |
try:
|
32 |
# Download the file from URL
|
33 |
logger.info(f"Attempting to download Excel file from: {EXCEL_URL}")
|
|
|
36 |
|
37 |
# Read the Excel file from the downloaded content
|
38 |
excel_content = io.BytesIO(response.content)
|
39 |
+
|
40 |
def custom_date_parser(date_str):
|
41 |
try:
|
42 |
+
# First try to parse as MM/DD/YYYY
|
43 |
+
dt = datetime.strptime(str(date_str), '%m/%d/%Y')
|
44 |
+
return dt
|
45 |
+
except ValueError:
|
46 |
try:
|
47 |
+
# Then try DD/MM/YYYY
|
48 |
+
dt = datetime.strptime(str(date_str), '%d/%m/%Y')
|
49 |
+
return dt
|
50 |
+
except ValueError:
|
51 |
+
try:
|
52 |
+
# Then try YYYY-MM-DD
|
53 |
+
dt = datetime.strptime(str(date_str), '%Y-%m-%d')
|
54 |
+
return dt
|
55 |
+
except ValueError:
|
56 |
+
# If all else fails, try pandas' flexible parser
|
57 |
+
return pd.to_datetime(date_str, format='mixed', dayfirst=True)
|
58 |
|
59 |
+
# Read the Excel file with the custom date parser
|
60 |
+
purchase_history = pd.read_excel(
|
61 |
+
excel_content,
|
62 |
+
sheet_name='Transaction History'
|
|
|
63 |
)
|
64 |
|
65 |
+
# Convert Purchase_Date column separately with error handling
|
66 |
+
def safe_parse_date(date_val):
|
67 |
+
if pd.isna(date_val):
|
68 |
+
return None
|
69 |
+
try:
|
70 |
+
if isinstance(date_val, (datetime, pd.Timestamp)):
|
71 |
+
return date_val
|
72 |
+
return custom_date_parser(date_val)
|
73 |
+
except Exception as e:
|
74 |
+
logger.error(f"Error parsing date {date_val}: {str(e)}")
|
75 |
+
return None
|
76 |
+
|
77 |
+
# Convert dates and handle any parsing errors
|
78 |
+
purchase_history['Purchase_Date'] = purchase_history['Purchase_Date'].apply(safe_parse_date)
|
79 |
+
|
80 |
+
# Remove any rows where date parsing failed
|
81 |
+
purchase_history = purchase_history.dropna(subset=['Purchase_Date'])
|
82 |
+
|
83 |
+
# Log some information about the dates for debugging
|
84 |
+
logger.info(f"Date column type: {purchase_history['Purchase_Date'].dtype}")
|
85 |
+
logger.info(f"Sample dates: {purchase_history['Purchase_Date'].head()}")
|
86 |
+
logger.info(f"Number of valid dates: {purchase_history['Purchase_Date'].count()}")
|
87 |
+
|
88 |
# Read Customer Profile sheet
|
89 |
excel_content.seek(0) # Reset buffer position
|
90 |
customer_profiles = pd.read_excel(excel_content, sheet_name='Customer Profile (Individual)')
|
91 |
|
92 |
# Read Social Media Sentiment
|
93 |
excel_content.seek(0) # Reset buffer position
|
94 |
+
customer_Media = pd.read_excel(
|
95 |
+
excel_content,
|
96 |
+
sheet_name='Social Media Sentiment',
|
97 |
+
parse_dates=['Timestamp']
|
98 |
+
)
|
99 |
|
100 |
logger.info("Successfully downloaded and loaded Excel file")
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
logger.error(f"Error downloading or processing data: {str(e)}")
|
104 |
+
raise
|
105 |
+
|
106 |
|
107 |
# Process the data
|
108 |
purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str)
|