bardd commited on
Commit
f27556c
·
verified ·
1 Parent(s): 0f94c51

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +36 -18
main.py CHANGED
@@ -78,55 +78,73 @@ def generate_recommendations_for_session(session_id):
78
  # Convert session data to a DataFrame
79
  raw_df = pd.DataFrame(session_data)
80
 
81
- # Aggregate data by id and action
82
- aggregated_data = raw_df.groupby(['id', 'action']).agg(
83
- presence=('action', 'size'),
84
- total_duration=('duration', 'sum')
85
- ).reset_index()
86
 
87
- # Create a pivot table from the aggregated data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  pivot_df = aggregated_data.pivot_table(
89
  index=['id'],
90
  columns='action',
91
- values=['presence', 'total_duration'],
92
  fill_value=0
93
  )
94
-
95
  # Flatten column names
96
  pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
97
-
98
  # Ensure all expected columns exist in the pivot table
99
  for col in ALL_COLUMNS:
100
  if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
101
  pivot_df[f'presence_{col}'] = 0
102
- elif col == 'time_spent' and 'total_duration_time_spent' not in pivot_df.columns:
103
  pivot_df['total_duration_time_spent'] = 0
104
-
105
  # Calculate interaction score for each row
106
  pivot_df['interaction_score'] = pivot_df.apply(calculate_interaction_score, axis=1)
107
-
108
  # Create a user vector based on the interaction scores
109
  user_vector = pd.Series(index=user_item_matrix_columns, dtype=float).fillna(0)
110
  for property_id, score in pivot_df['interaction_score'].items():
111
  if property_id in user_vector.index:
112
  user_vector[property_id] = score
113
-
114
  # Transform the user vector using the SVD model
115
  user_vector_array = user_vector.values.reshape(1, -1)
116
  user_latent = svd.transform(user_vector_array)
117
-
118
  # Calculate similarity scores between the user vector and item factors
119
  similarity_scores = cosine_similarity(user_latent, item_factors)
120
-
121
  # Get the indices of the top 10 most similar items
122
  top_indices = similarity_scores.argsort()[0][-10:][::-1]
123
-
124
  # Get the corresponding property IDs for the top indices
125
  recommendations = user_item_matrix_columns[top_indices].tolist()
126
-
127
  return recommendations
 
128
  except Exception as e:
129
- logger.error(f"Error in generate_recommendations_for_session: {e}")
 
130
  return None
131
 
132
 
 
78
  # Convert session data to a DataFrame
79
  raw_df = pd.DataFrame(session_data)
80
 
81
+ # Debug: Print column names
82
+ logger.debug(f"Columns in raw_df: {raw_df.columns.tolist()}")
 
 
 
83
 
84
+ # Check if required columns exist
85
+ required_columns = ['id', 'action']
86
+ missing_columns = [col for col in required_columns if col not in raw_df.columns]
87
+ if missing_columns:
88
+ logger.error(f"Missing required columns: {missing_columns}")
89
+ return None
90
+
91
+ # Determine aggregation based on presence of 'duration' column
92
+ if 'duration' in raw_df.columns:
93
+ aggregated_data = raw_df.groupby(['id', 'action']).agg(
94
+ presence=('action', 'size'),
95
+ total_duration=('duration', 'sum')
96
+ ).reset_index()
97
+ else:
98
+ aggregated_data = raw_df.groupby(['id', 'action']).agg(
99
+ presence=('action', 'size')
100
+ ).reset_index()
101
+
102
+ # Create pivot table
103
+ pivot_columns = ['presence', 'total_duration'] if 'duration' in raw_df.columns else ['presence']
104
  pivot_df = aggregated_data.pivot_table(
105
  index=['id'],
106
  columns='action',
107
+ values=pivot_columns,
108
  fill_value=0
109
  )
110
+
111
  # Flatten column names
112
  pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
113
+
114
  # Ensure all expected columns exist in the pivot table
115
  for col in ALL_COLUMNS:
116
  if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
117
  pivot_df[f'presence_{col}'] = 0
118
+ elif col == 'time_spent' and 'duration' in raw_df.columns and 'total_duration_time_spent' not in pivot_df.columns:
119
  pivot_df['total_duration_time_spent'] = 0
120
+
121
  # Calculate interaction score for each row
122
  pivot_df['interaction_score'] = pivot_df.apply(calculate_interaction_score, axis=1)
123
+
124
  # Create a user vector based on the interaction scores
125
  user_vector = pd.Series(index=user_item_matrix_columns, dtype=float).fillna(0)
126
  for property_id, score in pivot_df['interaction_score'].items():
127
  if property_id in user_vector.index:
128
  user_vector[property_id] = score
129
+
130
  # Transform the user vector using the SVD model
131
  user_vector_array = user_vector.values.reshape(1, -1)
132
  user_latent = svd.transform(user_vector_array)
133
+
134
  # Calculate similarity scores between the user vector and item factors
135
  similarity_scores = cosine_similarity(user_latent, item_factors)
136
+
137
  # Get the indices of the top 10 most similar items
138
  top_indices = similarity_scores.argsort()[0][-10:][::-1]
139
+
140
  # Get the corresponding property IDs for the top indices
141
  recommendations = user_item_matrix_columns[top_indices].tolist()
142
+
143
  return recommendations
144
+
145
  except Exception as e:
146
+ logger.error(f"Error in generate_recommendations_for_session: {str(e)}")
147
+ logger.debug(f"Raw dataframe info: {raw_df.info()}")
148
  return None
149
 
150