npc0 commited on
Commit
21c7fed
·
verified ·
1 Parent(s): 9c00a0d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +7 -57
src/streamlit_app.py CHANGED
@@ -153,48 +153,6 @@ def get_r_matrix_from_votes():
153
  local_con.close()
154
 
155
 
156
- # Custom Hamming-like distance function handling NaNs for clustering
157
- # Assumes numpy is imported as np
158
- def hamming_distance_with_nan(u1, u2):
159
- """
160
- Calculates a Hamming-like distance between two vectors (user vote profiles)
161
- ignoring positions where either value is NaN.
162
-
163
- Args:
164
- u1 (np.ndarray or pd.Series): First vector.
165
- u2 (np.ndarray or pd.Series): Second vector.
166
-
167
- Returns:
168
- float: The proportion of differing elements among non-NaN positions.
169
- Returns 0.0 if vectors are identical (including all NaN),
170
- 1.0 if different but no common non-NaN positions.
171
- """
172
- u1 = np.asarray(u1)
173
- u2 = np.asarray(u2)
174
-
175
- # Find positions where both are not NaN
176
- both_not_nan_mask = ~np.isnan(u1) & ~np.isnan(u2)
177
-
178
- # If no common non-NaN values
179
- if not np.any(both_not_nan_mask):
180
- # If vectors are identical (e.g., both all NaN), distance is 0.
181
- # If different vectors with no common non-NaN, distance is 1 (max difference).
182
- if np.array_equal(u1, u2, equal_nan=True):
183
- return 0.0
184
- else:
185
- return 1.0
186
-
187
- # Filter to only positions where both are not NaN
188
- u1_filtered = u1[both_not_nan_mask]
189
- u2_filtered = u2[both_not_nan_mask]
190
-
191
- # Calculate proportion of differing elements among common non-NaN positions
192
- diff_count = np.sum(u1_filtered != u2_filtered)
193
- total_count = len(u1_filtered)
194
-
195
- return diff_count / total_count
196
-
197
-
198
  # Function to get clusters using HDBSCAN with the custom Hamming distance
199
  # Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
200
  def get_clusters_from_r_matrix(r_matrix):
@@ -222,11 +180,10 @@ def get_clusters_from_r_matrix(r_matrix):
222
  # These might need tuning based on data characteristics and desired cluster granularity
223
  # allow_single_cluster=True prevents an error if all points form one cluster
224
  clusterer = hdbscan.HDBSCAN(
225
- metric=hamming_distance_with_nan,
226
  allow_single_cluster=True,
227
  min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
228
- min_samples=None,
229
- )
230
 
231
  # Fit the model directly to the DataFrame values
232
  # HDBSCAN fit expects a numpy array or similar structure
@@ -267,7 +224,7 @@ def get_cluster_labels(user_id):
267
  # Filter the r_matrix to include only these columns
268
  # This is the matrix that will be used for clustering in the next step.
269
  # The subsequent line calling get_clusters_from_r_matrix should use this variable.
270
- r_matrix_to_cluster = r_matrix[voted_comment_ids]
271
  cluster_labels = get_clusters_from_r_matrix(r_matrix)
272
  if len(cluster_labels) == 0:
273
  cluster_labels = [0] * len(user_id_to_index)
@@ -983,6 +940,7 @@ def view_topic_page():
983
  st.markdown(random.choice(prompts))
984
  new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
985
  if st.button("Share Your Wisdom"):
 
986
  if new_comment_text and len(new_comment_text.strip()):
987
  user_email = st.session_state.get('user_email', '')
988
  user_id = find_or_create_user(user_email) # Ensure user exists
@@ -999,17 +957,7 @@ def view_topic_page():
999
  # Append new comment to history
1000
  st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
1001
 
1002
- # Get next comment (could be the one just submitted)
1003
- next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
1004
- st.session_state.current_comment_id = next_comment_id
1005
- st.session_state.current_comment_content = next_comment_content
1006
-
1007
- # Update progress
1008
- update_user_progress(user_id, topic_id, next_comment_id)
1009
-
1010
  st.session_state.tmp_new_comment_input = "" # Clear input box
1011
- st.rerun() # Rerun to update UI
1012
-
1013
  except Exception as e:
1014
  st.error(f"Error sharing information: {e}")
1015
  finally:
@@ -1017,6 +965,7 @@ def view_topic_page():
1017
  local_con.close()
1018
  else:
1019
  st.error("Could not find or create user.")
 
1020
 
1021
  # Get next comment
1022
  # This should always get the next unvoted comment for the user in this topic.
@@ -1029,7 +978,8 @@ def view_topic_page():
1029
  update_user_progress(user_id, topic_id, next_comment_id)
1030
 
1031
  st.session_state._voting_in_progress = False
1032
- st.rerun() # Rerun to update UI
 
1033
 
1034
  except Exception as e:
1035
  st.error(f"Error processing vote: {e}")
 
153
  local_con.close()
154
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  # Function to get clusters using HDBSCAN with the custom Hamming distance
157
  # Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
158
  def get_clusters_from_r_matrix(r_matrix):
 
180
  # These might need tuning based on data characteristics and desired cluster granularity
181
  # allow_single_cluster=True prevents an error if all points form one cluster
182
  clusterer = hdbscan.HDBSCAN(
183
+ metric='hamming',
184
  allow_single_cluster=True,
185
  min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
186
+ min_samples=None)
 
187
 
188
  # Fit the model directly to the DataFrame values
189
  # HDBSCAN fit expects a numpy array or similar structure
 
224
  # Filter the r_matrix to include only these columns
225
  # This is the matrix that will be used for clustering in the next step.
226
  # The subsequent line calling get_clusters_from_r_matrix should use this variable.
227
+ r_matrix = r_matrix[voted_comment_ids]
228
  cluster_labels = get_clusters_from_r_matrix(r_matrix)
229
  if len(cluster_labels) == 0:
230
  cluster_labels = [0] * len(user_id_to_index)
 
940
  st.markdown(random.choice(prompts))
941
  new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
942
  if st.button("Share Your Wisdom"):
943
+ st.session_state.handling_vote = True # lock
944
  if new_comment_text and len(new_comment_text.strip()):
945
  user_email = st.session_state.get('user_email', '')
946
  user_id = find_or_create_user(user_email) # Ensure user exists
 
957
  # Append new comment to history
958
  st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
959
 
 
 
 
 
 
 
 
 
960
  st.session_state.tmp_new_comment_input = "" # Clear input box
 
 
961
  except Exception as e:
962
  st.error(f"Error sharing information: {e}")
963
  finally:
 
965
  local_con.close()
966
  else:
967
  st.error("Could not find or create user.")
968
+ st.session_state.handling_vote = False # lock
969
 
970
  # Get next comment
971
  # This should always get the next unvoted comment for the user in this topic.
 
978
  update_user_progress(user_id, topic_id, next_comment_id)
979
 
980
  st.session_state._voting_in_progress = False
981
+ if st.session_state.get("handling_vote", False) is False:
982
+ st.rerun() # Rerun to update UI
983
 
984
  except Exception as e:
985
  st.error(f"Error processing vote: {e}")