Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,6 @@ def generate_hypercube(n=4):
|
|
17 |
return np.array(list(itertools.product([0, 1], repeat=n)), dtype=float)
|
18 |
|
19 |
def generate_simplex(n=3):
|
20 |
-
# n-simplex in n-D: standard basis vectors + origin
|
21 |
eye = np.eye(n, dtype=float)
|
22 |
origin = np.zeros((1, n), dtype=float)
|
23 |
return np.vstack([eye, origin])
|
@@ -44,8 +43,7 @@ def parse_text_points(text: str) -> np.ndarray:
|
|
44 |
txt = textwrap.dedent(text.strip())
|
45 |
rows = [r for r in txt.splitlines() if r.strip()]
|
46 |
data = [list(map(float, r.replace(",", " ").split())) for r in rows]
|
47 |
-
|
48 |
-
return arr
|
49 |
|
50 |
def run_tsne(data, perp, seed):
|
51 |
ts = TSNE(n_components=2, perplexity=perp, random_state=seed, init="pca")
|
@@ -95,13 +93,11 @@ with st.sidebar:
|
|
95 |
algo = st.selectbox("Method", ["t-SNE","PCA","UMAP"])
|
96 |
seed = st.number_input("Random seed", value=42, step=1)
|
97 |
|
98 |
-
# method-specific
|
99 |
if algo == "t-SNE":
|
100 |
perp = st.slider("Perplexity", 5.0, 50.0, 30.0, 1.0)
|
101 |
elif algo == "UMAP":
|
102 |
neighbors = st.slider("n_neighbors", 5, 200, 15, 5)
|
103 |
min_dist = st.slider("min_dist", 0.0, 0.99, 0.1, 0.01)
|
104 |
-
# PCA has no extra params
|
105 |
|
106 |
st.header("3️⃣ Clustering (optional)")
|
107 |
do_cluster = st.checkbox("Cluster embedding")
|
@@ -130,10 +126,15 @@ if run:
|
|
130 |
else:
|
131 |
emb, kl = run_umap(pts, neighbors, min_dist, seed)
|
132 |
|
133 |
-
#
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
-
# clustering
|
137 |
df = pd.DataFrame(emb, columns=["x","y"])
|
138 |
if do_cluster:
|
139 |
if cluster_algo == "KMeans":
|
@@ -154,7 +155,11 @@ if run:
|
|
154 |
st.subheader("2-D Embedding")
|
155 |
st.plotly_chart(fig, use_container_width=True)
|
156 |
|
157 |
-
|
|
|
|
|
|
|
|
|
158 |
if kl is not None:
|
159 |
st.markdown(f"**t-SNE KL divergence:** {kl:.3f}")
|
160 |
|
@@ -167,11 +172,9 @@ if run:
|
|
167 |
mime="text/csv"
|
168 |
)
|
169 |
|
170 |
-
# raw data expander
|
171 |
with st.expander("Show original data"):
|
172 |
st.write(pts)
|
173 |
|
174 |
-
# t-SNE math explainer
|
175 |
if algo == "t-SNE":
|
176 |
with st.expander("🧠 How t-SNE works"):
|
177 |
st.markdown(r"""
|
|
|
17 |
return np.array(list(itertools.product([0, 1], repeat=n)), dtype=float)
|
18 |
|
19 |
def generate_simplex(n=3):
|
|
|
20 |
eye = np.eye(n, dtype=float)
|
21 |
origin = np.zeros((1, n), dtype=float)
|
22 |
return np.vstack([eye, origin])
|
|
|
43 |
txt = textwrap.dedent(text.strip())
|
44 |
rows = [r for r in txt.splitlines() if r.strip()]
|
45 |
data = [list(map(float, r.replace(",", " ").split())) for r in rows]
|
46 |
+
return np.array(data, dtype=float)
|
|
|
47 |
|
48 |
def run_tsne(data, perp, seed):
|
49 |
ts = TSNE(n_components=2, perplexity=perp, random_state=seed, init="pca")
|
|
|
93 |
algo = st.selectbox("Method", ["t-SNE","PCA","UMAP"])
|
94 |
seed = st.number_input("Random seed", value=42, step=1)
|
95 |
|
|
|
96 |
if algo == "t-SNE":
|
97 |
perp = st.slider("Perplexity", 5.0, 50.0, 30.0, 1.0)
|
98 |
elif algo == "UMAP":
|
99 |
neighbors = st.slider("n_neighbors", 5, 200, 15, 5)
|
100 |
min_dist = st.slider("min_dist", 0.0, 0.99, 0.1, 0.01)
|
|
|
101 |
|
102 |
st.header("3️⃣ Clustering (optional)")
|
103 |
do_cluster = st.checkbox("Cluster embedding")
|
|
|
126 |
else:
|
127 |
emb, kl = run_umap(pts, neighbors, min_dist, seed)
|
128 |
|
129 |
+
# dynamic trustworthiness
|
130 |
+
n_samples = pts.shape[0]
|
131 |
+
k_max = (n_samples - 1) // 2
|
132 |
+
if k_max >= 1:
|
133 |
+
tw = trustworthiness(pts, emb, n_neighbors=k_max)
|
134 |
+
else:
|
135 |
+
tw = None
|
136 |
|
137 |
+
# clustering & plotting
|
138 |
df = pd.DataFrame(emb, columns=["x","y"])
|
139 |
if do_cluster:
|
140 |
if cluster_algo == "KMeans":
|
|
|
155 |
st.subheader("2-D Embedding")
|
156 |
st.plotly_chart(fig, use_container_width=True)
|
157 |
|
158 |
+
if tw is not None:
|
159 |
+
st.markdown(f"**Trustworthiness (k={k_max}):** {tw:.3f}")
|
160 |
+
else:
|
161 |
+
st.markdown("**Trustworthiness:** Not enough samples to compute (need ≥3 points).")
|
162 |
+
|
163 |
if kl is not None:
|
164 |
st.markdown(f"**t-SNE KL divergence:** {kl:.3f}")
|
165 |
|
|
|
172 |
mime="text/csv"
|
173 |
)
|
174 |
|
|
|
175 |
with st.expander("Show original data"):
|
176 |
st.write(pts)
|
177 |
|
|
|
178 |
if algo == "t-SNE":
|
179 |
with st.expander("🧠 How t-SNE works"):
|
180 |
st.markdown(r"""
|