Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +7 -57
src/streamlit_app.py
CHANGED
@@ -153,48 +153,6 @@ def get_r_matrix_from_votes():
|
|
153 |
local_con.close()
|
154 |
|
155 |
|
156 |
-
# Custom Hamming-like distance function handling NaNs for clustering
|
157 |
-
# Assumes numpy is imported as np
|
158 |
-
def hamming_distance_with_nan(u1, u2):
|
159 |
-
"""
|
160 |
-
Calculates a Hamming-like distance between two vectors (user vote profiles)
|
161 |
-
ignoring positions where either value is NaN.
|
162 |
-
|
163 |
-
Args:
|
164 |
-
u1 (np.ndarray or pd.Series): First vector.
|
165 |
-
u2 (np.ndarray or pd.Series): Second vector.
|
166 |
-
|
167 |
-
Returns:
|
168 |
-
float: The proportion of differing elements among non-NaN positions.
|
169 |
-
Returns 0.0 if vectors are identical (including all NaN),
|
170 |
-
1.0 if different but no common non-NaN positions.
|
171 |
-
"""
|
172 |
-
u1 = np.asarray(u1)
|
173 |
-
u2 = np.asarray(u2)
|
174 |
-
|
175 |
-
# Find positions where both are not NaN
|
176 |
-
both_not_nan_mask = ~np.isnan(u1) & ~np.isnan(u2)
|
177 |
-
|
178 |
-
# If no common non-NaN values
|
179 |
-
if not np.any(both_not_nan_mask):
|
180 |
-
# If vectors are identical (e.g., both all NaN), distance is 0.
|
181 |
-
# If different vectors with no common non-NaN, distance is 1 (max difference).
|
182 |
-
if np.array_equal(u1, u2, equal_nan=True):
|
183 |
-
return 0.0
|
184 |
-
else:
|
185 |
-
return 1.0
|
186 |
-
|
187 |
-
# Filter to only positions where both are not NaN
|
188 |
-
u1_filtered = u1[both_not_nan_mask]
|
189 |
-
u2_filtered = u2[both_not_nan_mask]
|
190 |
-
|
191 |
-
# Calculate proportion of differing elements among common non-NaN positions
|
192 |
-
diff_count = np.sum(u1_filtered != u2_filtered)
|
193 |
-
total_count = len(u1_filtered)
|
194 |
-
|
195 |
-
return diff_count / total_count
|
196 |
-
|
197 |
-
|
198 |
# Function to get clusters using HDBSCAN with the custom Hamming distance
|
199 |
# Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
|
200 |
def get_clusters_from_r_matrix(r_matrix):
|
@@ -222,11 +180,10 @@ def get_clusters_from_r_matrix(r_matrix):
|
|
222 |
# These might need tuning based on data characteristics and desired cluster granularity
|
223 |
# allow_single_cluster=True prevents an error if all points form one cluster
|
224 |
clusterer = hdbscan.HDBSCAN(
|
225 |
-
metric=
|
226 |
allow_single_cluster=True,
|
227 |
min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
|
228 |
-
min_samples=None
|
229 |
-
)
|
230 |
|
231 |
# Fit the model directly to the DataFrame values
|
232 |
# HDBSCAN fit expects a numpy array or similar structure
|
@@ -267,7 +224,7 @@ def get_cluster_labels(user_id):
|
|
267 |
# Filter the r_matrix to include only these columns
|
268 |
# This is the matrix that will be used for clustering in the next step.
|
269 |
# The subsequent line calling get_clusters_from_r_matrix should use this variable.
|
270 |
-
|
271 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
272 |
if len(cluster_labels) == 0:
|
273 |
cluster_labels = [0] * len(user_id_to_index)
|
@@ -983,6 +940,7 @@ def view_topic_page():
|
|
983 |
st.markdown(random.choice(prompts))
|
984 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
985 |
if st.button("Share Your Wisdom"):
|
|
|
986 |
if new_comment_text and len(new_comment_text.strip()):
|
987 |
user_email = st.session_state.get('user_email', '')
|
988 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
@@ -999,17 +957,7 @@ def view_topic_page():
|
|
999 |
# Append new comment to history
|
1000 |
st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
|
1001 |
|
1002 |
-
# Get next comment (could be the one just submitted)
|
1003 |
-
next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
|
1004 |
-
st.session_state.current_comment_id = next_comment_id
|
1005 |
-
st.session_state.current_comment_content = next_comment_content
|
1006 |
-
|
1007 |
-
# Update progress
|
1008 |
-
update_user_progress(user_id, topic_id, next_comment_id)
|
1009 |
-
|
1010 |
st.session_state.tmp_new_comment_input = "" # Clear input box
|
1011 |
-
st.rerun() # Rerun to update UI
|
1012 |
-
|
1013 |
except Exception as e:
|
1014 |
st.error(f"Error sharing information: {e}")
|
1015 |
finally:
|
@@ -1017,6 +965,7 @@ def view_topic_page():
|
|
1017 |
local_con.close()
|
1018 |
else:
|
1019 |
st.error("Could not find or create user.")
|
|
|
1020 |
|
1021 |
# Get next comment
|
1022 |
# This should always get the next unvoted comment for the user in this topic.
|
@@ -1029,7 +978,8 @@ def view_topic_page():
|
|
1029 |
update_user_progress(user_id, topic_id, next_comment_id)
|
1030 |
|
1031 |
st.session_state._voting_in_progress = False
|
1032 |
-
st.
|
|
|
1033 |
|
1034 |
except Exception as e:
|
1035 |
st.error(f"Error processing vote: {e}")
|
|
|
153 |
local_con.close()
|
154 |
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
# Function to get clusters using HDBSCAN with the custom Hamming distance
|
157 |
# Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
|
158 |
def get_clusters_from_r_matrix(r_matrix):
|
|
|
180 |
# These might need tuning based on data characteristics and desired cluster granularity
|
181 |
# allow_single_cluster=True prevents an error if all points form one cluster
|
182 |
clusterer = hdbscan.HDBSCAN(
|
183 |
+
metric='hamming',
|
184 |
allow_single_cluster=True,
|
185 |
min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
|
186 |
+
min_samples=None)
|
|
|
187 |
|
188 |
# Fit the model directly to the DataFrame values
|
189 |
# HDBSCAN fit expects a numpy array or similar structure
|
|
|
224 |
# Filter the r_matrix to include only these columns
|
225 |
# This is the matrix that will be used for clustering in the next step.
|
226 |
# The subsequent line calling get_clusters_from_r_matrix should use this variable.
|
227 |
+
r_matrix = r_matrix[voted_comment_ids]
|
228 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
229 |
if len(cluster_labels) == 0:
|
230 |
cluster_labels = [0] * len(user_id_to_index)
|
|
|
940 |
st.markdown(random.choice(prompts))
|
941 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
942 |
if st.button("Share Your Wisdom"):
|
943 |
+
st.session_state.handling_vote = True # lock
|
944 |
if new_comment_text and len(new_comment_text.strip()):
|
945 |
user_email = st.session_state.get('user_email', '')
|
946 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
|
|
957 |
# Append new comment to history
|
958 |
st.session_state.comment_history += f"\n\n💬 {new_comment_text}"
|
959 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
960 |
st.session_state.tmp_new_comment_input = "" # Clear input box
|
|
|
|
|
961 |
except Exception as e:
|
962 |
st.error(f"Error sharing information: {e}")
|
963 |
finally:
|
|
|
965 |
local_con.close()
|
966 |
else:
|
967 |
st.error("Could not find or create user.")
|
968 |
+
st.session_state.handling_vote = False # lock
|
969 |
|
970 |
# Get next comment
|
971 |
# This should always get the next unvoted comment for the user in this topic.
|
|
|
978 |
update_user_progress(user_id, topic_id, next_comment_id)
|
979 |
|
980 |
st.session_state._voting_in_progress = False
|
981 |
+
if st.session_state.get("handling_vote", False) is False:
|
982 |
+
st.rerun() # Rerun to update UI
|
983 |
|
984 |
except Exception as e:
|
985 |
st.error(f"Error processing vote: {e}")
|