Update main.py
Browse files
main.py
CHANGED
@@ -78,55 +78,73 @@ def generate_recommendations_for_session(session_id):
|
|
78 |
# Convert session data to a DataFrame
|
79 |
raw_df = pd.DataFrame(session_data)
|
80 |
|
81 |
-
#
|
82 |
-
|
83 |
-
presence=('action', 'size'),
|
84 |
-
total_duration=('duration', 'sum')
|
85 |
-
).reset_index()
|
86 |
|
87 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
pivot_df = aggregated_data.pivot_table(
|
89 |
index=['id'],
|
90 |
columns='action',
|
91 |
-
values=
|
92 |
fill_value=0
|
93 |
)
|
94 |
-
|
95 |
# Flatten column names
|
96 |
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
|
97 |
-
|
98 |
# Ensure all expected columns exist in the pivot table
|
99 |
for col in ALL_COLUMNS:
|
100 |
if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
|
101 |
pivot_df[f'presence_{col}'] = 0
|
102 |
-
elif col == 'time_spent' and 'total_duration_time_spent' not in pivot_df.columns:
|
103 |
pivot_df['total_duration_time_spent'] = 0
|
104 |
-
|
105 |
# Calculate interaction score for each row
|
106 |
pivot_df['interaction_score'] = pivot_df.apply(calculate_interaction_score, axis=1)
|
107 |
-
|
108 |
# Create a user vector based on the interaction scores
|
109 |
user_vector = pd.Series(index=user_item_matrix_columns, dtype=float).fillna(0)
|
110 |
for property_id, score in pivot_df['interaction_score'].items():
|
111 |
if property_id in user_vector.index:
|
112 |
user_vector[property_id] = score
|
113 |
-
|
114 |
# Transform the user vector using the SVD model
|
115 |
user_vector_array = user_vector.values.reshape(1, -1)
|
116 |
user_latent = svd.transform(user_vector_array)
|
117 |
-
|
118 |
# Calculate similarity scores between the user vector and item factors
|
119 |
similarity_scores = cosine_similarity(user_latent, item_factors)
|
120 |
-
|
121 |
# Get the indices of the top 10 most similar items
|
122 |
top_indices = similarity_scores.argsort()[0][-10:][::-1]
|
123 |
-
|
124 |
# Get the corresponding property IDs for the top indices
|
125 |
recommendations = user_item_matrix_columns[top_indices].tolist()
|
126 |
-
|
127 |
return recommendations
|
|
|
128 |
except Exception as e:
|
129 |
-
logger.error(f"Error in generate_recommendations_for_session: {e}")
|
|
|
130 |
return None
|
131 |
|
132 |
|
|
|
78 |
# Convert session data to a DataFrame
|
79 |
raw_df = pd.DataFrame(session_data)
|
80 |
|
81 |
+
# Debug: Print column names
|
82 |
+
logger.debug(f"Columns in raw_df: {raw_df.columns.tolist()}")
|
|
|
|
|
|
|
83 |
|
84 |
+
# Check if required columns exist
|
85 |
+
required_columns = ['id', 'action']
|
86 |
+
missing_columns = [col for col in required_columns if col not in raw_df.columns]
|
87 |
+
if missing_columns:
|
88 |
+
logger.error(f"Missing required columns: {missing_columns}")
|
89 |
+
return None
|
90 |
+
|
91 |
+
# Determine aggregation based on presence of 'duration' column
|
92 |
+
if 'duration' in raw_df.columns:
|
93 |
+
aggregated_data = raw_df.groupby(['id', 'action']).agg(
|
94 |
+
presence=('action', 'size'),
|
95 |
+
total_duration=('duration', 'sum')
|
96 |
+
).reset_index()
|
97 |
+
else:
|
98 |
+
aggregated_data = raw_df.groupby(['id', 'action']).agg(
|
99 |
+
presence=('action', 'size')
|
100 |
+
).reset_index()
|
101 |
+
|
102 |
+
# Create pivot table
|
103 |
+
pivot_columns = ['presence', 'total_duration'] if 'duration' in raw_df.columns else ['presence']
|
104 |
pivot_df = aggregated_data.pivot_table(
|
105 |
index=['id'],
|
106 |
columns='action',
|
107 |
+
values=pivot_columns,
|
108 |
fill_value=0
|
109 |
)
|
110 |
+
|
111 |
# Flatten column names
|
112 |
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
|
113 |
+
|
114 |
# Ensure all expected columns exist in the pivot table
|
115 |
for col in ALL_COLUMNS:
|
116 |
if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
|
117 |
pivot_df[f'presence_{col}'] = 0
|
118 |
+
elif col == 'time_spent' and 'duration' in raw_df.columns and 'total_duration_time_spent' not in pivot_df.columns:
|
119 |
pivot_df['total_duration_time_spent'] = 0
|
120 |
+
|
121 |
# Calculate interaction score for each row
|
122 |
pivot_df['interaction_score'] = pivot_df.apply(calculate_interaction_score, axis=1)
|
123 |
+
|
124 |
# Create a user vector based on the interaction scores
|
125 |
user_vector = pd.Series(index=user_item_matrix_columns, dtype=float).fillna(0)
|
126 |
for property_id, score in pivot_df['interaction_score'].items():
|
127 |
if property_id in user_vector.index:
|
128 |
user_vector[property_id] = score
|
129 |
+
|
130 |
# Transform the user vector using the SVD model
|
131 |
user_vector_array = user_vector.values.reshape(1, -1)
|
132 |
user_latent = svd.transform(user_vector_array)
|
133 |
+
|
134 |
# Calculate similarity scores between the user vector and item factors
|
135 |
similarity_scores = cosine_similarity(user_latent, item_factors)
|
136 |
+
|
137 |
# Get the indices of the top 10 most similar items
|
138 |
top_indices = similarity_scores.argsort()[0][-10:][::-1]
|
139 |
+
|
140 |
# Get the corresponding property IDs for the top indices
|
141 |
recommendations = user_item_matrix_columns[top_indices].tolist()
|
142 |
+
|
143 |
return recommendations
|
144 |
+
|
145 |
except Exception as e:
|
146 |
+
logger.error(f"Error in generate_recommendations_for_session: {str(e)}")
|
147 |
+
logger.debug(f"Raw dataframe info: {raw_df.info()}")
|
148 |
return None
|
149 |
|
150 |
|