|
import io |
|
import textwrap |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import streamlit as st |
|
from sklearn.manifold import TSNE |
|
import plotly.express as px |
|
|
|
|
|
EXAMPLE_SHAPES = { |
|
"Cube (3-D, 8 vertices)": np.array([ |
|
[0, 0, 0], [0, 0, 1], |
|
[0, 1, 0], [0, 1, 1], |
|
[1, 0, 0], [1, 0, 1], |
|
[1, 1, 0], [1, 1, 1] |
|
]), |
|
"Square pyramid (3-D, 5 vertices)": np.array([ |
|
[-1, -1, 0], |
|
[ 1, -1, 0], |
|
[ 1, 1, 0], |
|
[-1, 1, 0], |
|
[ 0, 0, 1] |
|
]) |
|
} |
|
|
|
|
|
def parse_text_points(text: str) -> np.ndarray: |
|
""" |
|
Parse a multiline string of comma- or whitespace-separated numbers |
|
into an (n_points, n_dims) array. |
|
""" |
|
cleaned = textwrap.dedent(text.strip()) |
|
rows = [row for row in cleaned.splitlines() if row.strip()] |
|
data = [list(map(float, row.replace(",", " ").split())) for row in rows] |
|
return np.array(data, dtype=float) |
|
|
|
|
|
def run_tsne(data: np.ndarray, perplexity: float, seed: int) -> np.ndarray: |
|
tsne = TSNE( |
|
n_components=2, |
|
perplexity=perplexity, |
|
random_state=seed, |
|
init="pca" |
|
) |
|
return tsne.fit_transform(data) |
|
|
|
|
|
|
|
st.title("π t-SNE Explorer for n-D Point Clouds") |
|
st.markdown( |
|
""" |
|
Upload or paste your points, choose parameters, and see how |
|
**t-SNE** flattens them into 2-D. |
|
*Example shapes* are provided for quick experimentation. |
|
""" |
|
) |
|
|
|
|
|
with st.sidebar: |
|
st.header("1οΈβ£ Choose data source") |
|
source = st.radio( |
|
"Data input method", |
|
["Example shape", "Upload CSV/TXT", "Paste raw text"] |
|
) |
|
|
|
if source == "Example shape": |
|
shape_key = st.selectbox("Pick a shape", list(EXAMPLE_SHAPES.keys())) |
|
data_raw = EXAMPLE_SHAPES[shape_key] |
|
|
|
elif source == "Upload CSV/TXT": |
|
file = st.file_uploader("Upload coordinates file (*.csv / *.txt)") |
|
if file: |
|
text = io.StringIO(file.getvalue().decode("utf-8")).read() |
|
data_raw = parse_text_points(text) |
|
else: |
|
st.stop() |
|
|
|
else: |
|
placeholder = "e.g.\n0,0,0\n0,0,1\n0,1,0\n..." |
|
text = st.text_area("Paste coordinates (one point per line)", height=200, placeholder=placeholder) |
|
if not text.strip(): |
|
st.stop() |
|
data_raw = parse_text_points(text) |
|
|
|
st.divider() |
|
st.header("2οΈβ£ t-SNE parameters") |
|
perplexity = st.slider("Perplexity", 5.0, 50.0, 30.0, 1.0) |
|
seed = st.number_input("Random seed", value=42, step=1) |
|
run_button = st.button("Run t-SNE π") |
|
|
|
|
|
if run_button: |
|
if data_raw.ndim != 2 or data_raw.shape[0] < 2: |
|
st.error("Need at least two points; check your input.") |
|
st.stop() |
|
|
|
if perplexity >= data_raw.shape[0]: |
|
st.error("Perplexity must be less than the number of points.") |
|
st.stop() |
|
|
|
embedding = run_tsne(data_raw, perplexity, seed) |
|
df_plot = pd.DataFrame(embedding, columns=["x", "y"]) |
|
|
|
st.subheader("2-D embedding") |
|
fig = px.scatter(df_plot, x="x", y="y", width=700, height=500) |
|
fig.update_traces(marker=dict(size=10)) |
|
fig.update_layout(margin=dict(l=20, r=20, t=30, b=20)) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with st.expander("Show raw data"): |
|
st.write(pd.DataFrame(data_raw)) |