euler314 commited on
Commit
6988d0c
·
verified ·
1 Parent(s): 9b21bc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -17,7 +17,6 @@ def generate_hypercube(n=4):
17
  return np.array(list(itertools.product([0, 1], repeat=n)), dtype=float)
18
 
19
  def generate_simplex(n=3):
20
- # n-simplex in n-D: standard basis vectors + origin
21
  eye = np.eye(n, dtype=float)
22
  origin = np.zeros((1, n), dtype=float)
23
  return np.vstack([eye, origin])
@@ -44,8 +43,7 @@ def parse_text_points(text: str) -> np.ndarray:
44
  txt = textwrap.dedent(text.strip())
45
  rows = [r for r in txt.splitlines() if r.strip()]
46
  data = [list(map(float, r.replace(",", " ").split())) for r in rows]
47
- arr = np.array(data, dtype=float)
48
- return arr
49
 
50
  def run_tsne(data, perp, seed):
51
  ts = TSNE(n_components=2, perplexity=perp, random_state=seed, init="pca")
@@ -95,13 +93,11 @@ with st.sidebar:
95
  algo = st.selectbox("Method", ["t-SNE","PCA","UMAP"])
96
  seed = st.number_input("Random seed", value=42, step=1)
97
 
98
- # method-specific
99
  if algo == "t-SNE":
100
  perp = st.slider("Perplexity", 5.0, 50.0, 30.0, 1.0)
101
  elif algo == "UMAP":
102
  neighbors = st.slider("n_neighbors", 5, 200, 15, 5)
103
  min_dist = st.slider("min_dist", 0.0, 0.99, 0.1, 0.01)
104
- # PCA has no extra params
105
 
106
  st.header("3️⃣ Clustering (optional)")
107
  do_cluster = st.checkbox("Cluster embedding")
@@ -130,10 +126,15 @@ if run:
130
  else:
131
  emb, kl = run_umap(pts, neighbors, min_dist, seed)
132
 
133
- # compute trustworthiness
134
- tw = trustworthiness(pts, emb, n_neighbors=5)
 
 
 
 
 
135
 
136
- # clustering
137
  df = pd.DataFrame(emb, columns=["x","y"])
138
  if do_cluster:
139
  if cluster_algo == "KMeans":
@@ -154,7 +155,11 @@ if run:
154
  st.subheader("2-D Embedding")
155
  st.plotly_chart(fig, use_container_width=True)
156
 
157
- st.markdown(f"**Trustworthiness (k=5):** {tw:.3f}")
 
 
 
 
158
  if kl is not None:
159
  st.markdown(f"**t-SNE KL divergence:** {kl:.3f}")
160
 
@@ -167,11 +172,9 @@ if run:
167
  mime="text/csv"
168
  )
169
 
170
- # raw data expander
171
  with st.expander("Show original data"):
172
  st.write(pts)
173
 
174
- # t-SNE math explainer
175
  if algo == "t-SNE":
176
  with st.expander("🧠 How t-SNE works"):
177
  st.markdown(r"""
 
17
  return np.array(list(itertools.product([0, 1], repeat=n)), dtype=float)
18
 
19
  def generate_simplex(n=3):
 
20
  eye = np.eye(n, dtype=float)
21
  origin = np.zeros((1, n), dtype=float)
22
  return np.vstack([eye, origin])
 
43
  txt = textwrap.dedent(text.strip())
44
  rows = [r for r in txt.splitlines() if r.strip()]
45
  data = [list(map(float, r.replace(",", " ").split())) for r in rows]
46
+ return np.array(data, dtype=float)
 
47
 
48
  def run_tsne(data, perp, seed):
49
  ts = TSNE(n_components=2, perplexity=perp, random_state=seed, init="pca")
 
93
  algo = st.selectbox("Method", ["t-SNE","PCA","UMAP"])
94
  seed = st.number_input("Random seed", value=42, step=1)
95
 
 
96
  if algo == "t-SNE":
97
  perp = st.slider("Perplexity", 5.0, 50.0, 30.0, 1.0)
98
  elif algo == "UMAP":
99
  neighbors = st.slider("n_neighbors", 5, 200, 15, 5)
100
  min_dist = st.slider("min_dist", 0.0, 0.99, 0.1, 0.01)
 
101
 
102
  st.header("3️⃣ Clustering (optional)")
103
  do_cluster = st.checkbox("Cluster embedding")
 
126
  else:
127
  emb, kl = run_umap(pts, neighbors, min_dist, seed)
128
 
129
+ # dynamic trustworthiness
130
+ n_samples = pts.shape[0]
131
+ k_max = (n_samples - 1) // 2
132
+ if k_max >= 1:
133
+ tw = trustworthiness(pts, emb, n_neighbors=k_max)
134
+ else:
135
+ tw = None
136
 
137
+ # clustering & plotting
138
  df = pd.DataFrame(emb, columns=["x","y"])
139
  if do_cluster:
140
  if cluster_algo == "KMeans":
 
155
  st.subheader("2-D Embedding")
156
  st.plotly_chart(fig, use_container_width=True)
157
 
158
+ if tw is not None:
159
+ st.markdown(f"**Trustworthiness (k={k_max}):** {tw:.3f}")
160
+ else:
161
+ st.markdown("**Trustworthiness:** Not enough samples to compute (need ≥3 points).")
162
+
163
  if kl is not None:
164
  st.markdown(f"**t-SNE KL divergence:** {kl:.3f}")
165
 
 
172
  mime="text/csv"
173
  )
174
 
 
175
  with st.expander("Show original data"):
176
  st.write(pts)
177
 
 
178
  if algo == "t-SNE":
179
  with st.expander("🧠 How t-SNE works"):
180
  st.markdown(r"""