Spaces:

npc0
/

SteamPolis

Sleeping

App Files Files

npc0 commited on May 1

Commit

21c7fed

verified ·

1 Parent(s): 9c00a0d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +7 -57

src/streamlit_app.py CHANGED Viewed

@@ -153,48 +153,6 @@ def get_r_matrix_from_votes():
             local_con.close()
-# Custom Hamming-like distance function handling NaNs for clustering
-# Assumes numpy is imported as np
-def hamming_distance_with_nan(u1, u2):
-    """
-    Calculates a Hamming-like distance between two vectors (user vote profiles)
-    ignoring positions where either value is NaN.
-    Args:
-        u1 (np.ndarray or pd.Series): First vector.
-        u2 (np.ndarray or pd.Series): Second vector.
-    Returns:
-        float: The proportion of differing elements among non-NaN positions.
-               Returns 0.0 if vectors are identical (including all NaN),
-               1.0 if different but no common non-NaN positions.
-    """
-    u1 = np.asarray(u1)
-    u2 = np.asarray(u2)
-    # Find positions where both are not NaN
-    both_not_nan_mask = ~np.isnan(u1) & ~np.isnan(u2)
-    # If no common non-NaN values
-    if not np.any(both_not_nan_mask):
-         # If vectors are identical (e.g., both all NaN), distance is 0.
-         # If different vectors with no common non-NaN, distance is 1 (max difference).
-         if np.array_equal(u1, u2, equal_nan=True):
-              return 0.0
-         else:
-              return 1.0
-    # Filter to only positions where both are not NaN
-    u1_filtered = u1[both_not_nan_mask]
-    u2_filtered = u2[both_not_nan_mask]
-    # Calculate proportion of differing elements among common non-NaN positions
-    diff_count = np.sum(u1_filtered != u2_filtered)
-    total_count = len(u1_filtered)
-    return diff_count / total_count
 # Function to get clusters using HDBSCAN with the custom Hamming distance
 # Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
 def get_clusters_from_r_matrix(r_matrix):
@@ -222,11 +180,10 @@ def get_clusters_from_r_matrix(r_matrix):
         # These might need tuning based on data characteristics and desired cluster granularity
         # allow_single_cluster=True prevents an error if all points form one cluster
         clusterer = hdbscan.HDBSCAN(
-            metric=hamming_distance_with_nan,
             allow_single_cluster=True,
             min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
-            min_samples=None,
-        )
         # Fit the model directly to the DataFrame values
         # HDBSCAN fit expects a numpy array or similar structure
@@ -267,7 +224,7 @@ def get_cluster_labels(user_id):
     # Filter the r_matrix to include only these columns
     # This is the matrix that will be used for clustering in the next step.
     # The subsequent line calling get_clusters_from_r_matrix should use this variable.
-    r_matrix_to_cluster = r_matrix[voted_comment_ids]
     cluster_labels = get_clusters_from_r_matrix(r_matrix)
     if len(cluster_labels) == 0:
         cluster_labels = [0] * len(user_id_to_index)
@@ -983,6 +940,7 @@ def view_topic_page():
                         st.markdown(random.choice(prompts))
                         new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
                         if st.button("Share Your Wisdom"):
                             if new_comment_text and len(new_comment_text.strip()):
                                 user_email = st.session_state.get('user_email', '')
                                 user_id = find_or_create_user(user_email) # Ensure user exists
@@ -999,17 +957,7 @@ def view_topic_page():
                                         # Append new comment to history
                                         st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
-                                        # Get next comment (could be the one just submitted)
-                                        next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
-                                        st.session_state.current_comment_id = next_comment_id
-                                        st.session_state.current_comment_content = next_comment_content
-                                        # Update progress
-                                        update_user_progress(user_id, topic_id, next_comment_id)
                                         st.session_state.tmp_new_comment_input = "" # Clear input box
-                                        st.rerun() # Rerun to update UI
                                     except Exception as e:
                                         st.error(f"Error sharing information: {e}")
                                     finally:
@@ -1017,6 +965,7 @@ def view_topic_page():
                                             local_con.close()
                                 else:
                                     st.error("Could not find or create user.")
                 # Get next comment
                 # This should always get the next unvoted comment for the user in this topic.
@@ -1029,7 +978,8 @@ def view_topic_page():
                 update_user_progress(user_id, topic_id, next_comment_id)
                 st.session_state._voting_in_progress = False
-                st.rerun() # Rerun to update UI
             except Exception as e:
                 st.error(f"Error processing vote: {e}")

             local_con.close()
 # Function to get clusters using HDBSCAN with the custom Hamming distance
 # Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
 def get_clusters_from_r_matrix(r_matrix):
         # These might need tuning based on data characteristics and desired cluster granularity
         # allow_single_cluster=True prevents an error if all points form one cluster
         clusterer = hdbscan.HDBSCAN(
+            metric='hamming',
             allow_single_cluster=True,
             min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
+            min_samples=None)
         # Fit the model directly to the DataFrame values
         # HDBSCAN fit expects a numpy array or similar structure
     # Filter the r_matrix to include only these columns
     # This is the matrix that will be used for clustering in the next step.
     # The subsequent line calling get_clusters_from_r_matrix should use this variable.
+    r_matrix = r_matrix[voted_comment_ids]
     cluster_labels = get_clusters_from_r_matrix(r_matrix)
     if len(cluster_labels) == 0:
         cluster_labels = [0] * len(user_id_to_index)
                         st.markdown(random.choice(prompts))
                         new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
                         if st.button("Share Your Wisdom"):
+                            st.session_state.handling_vote = True # lock
                             if new_comment_text and len(new_comment_text.strip()):
                                 user_email = st.session_state.get('user_email', '')
                                 user_id = find_or_create_user(user_email) # Ensure user exists
                                         # Append new comment to history
                                         st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
                                         st.session_state.tmp_new_comment_input = "" # Clear input box
                                     except Exception as e:
                                         st.error(f"Error sharing information: {e}")
                                     finally:
                                             local_con.close()
                                 else:
                                     st.error("Could not find or create user.")
+                            st.session_state.handling_vote = False # lock
                 # Get next comment
                 # This should always get the next unvoted comment for the user in this topic.
                 update_user_progress(user_id, topic_id, next_comment_id)
                 st.session_state._voting_in_progress = False
+                if st.session_state.get("handling_vote", False) is False:
+                    st.rerun() # Rerun to update UI
             except Exception as e:
                 st.error(f"Error processing vote: {e}")