Spaces:

euler314
/

tsne

Sleeping

File size: 3,574 Bytes

3ea658b

import io
import textwrap

import numpy as np
import pandas as pd
import streamlit as st
from sklearn.manifold import TSNE
import plotly.express as px

# --------------  Helper functions -------------------------------------------
EXAMPLE_SHAPES = {
    "Cube (3-D, 8 vertices)": np.array([
        [0, 0, 0], [0, 0, 1],
        [0, 1, 0], [0, 1, 1],
        [1, 0, 0], [1, 0, 1],
        [1, 1, 0], [1, 1, 1]
    ]),
    "Square pyramid (3-D, 5 vertices)": np.array([
        [-1, -1,  0],
        [ 1, -1,  0],
        [ 1,  1,  0],
        [-1,  1,  0],
        [ 0,  0,  1]
    ])
}


def parse_text_points(text: str) -> np.ndarray:
    """
    Parse a multiline string of comma- or whitespace-separated numbers
    into an (n_points, n_dims) array.
    """
    cleaned = textwrap.dedent(text.strip())
    rows = [row for row in cleaned.splitlines() if row.strip()]
    data = [list(map(float, row.replace(",", " ").split())) for row in rows]
    return np.array(data, dtype=float)


def run_tsne(data: np.ndarray, perplexity: float, seed: int) -> np.ndarray:
    tsne = TSNE(
        n_components=2,
        perplexity=perplexity,
        random_state=seed,
        init="pca"
    )
    return tsne.fit_transform(data)
# ---------------------------------------------------------------------------


st.title("🌀 t-SNE Explorer for n-D Point Clouds")
st.markdown(
    """
    Upload or paste your points, choose parameters, and see how
    **t-SNE** flattens them into 2-D.  
    *Example shapes* are provided for quick experimentation.
    """
)

# --- Sidebar controls -------------------------------------------------------
with st.sidebar:
    st.header("1️⃣  Choose data source")
    source = st.radio(
        "Data input method",
        ["Example shape", "Upload CSV/TXT", "Paste raw text"]
    )

    if source == "Example shape":
        shape_key = st.selectbox("Pick a shape", list(EXAMPLE_SHAPES.keys()))
        data_raw = EXAMPLE_SHAPES[shape_key]

    elif source == "Upload CSV/TXT":
        file = st.file_uploader("Upload coordinates file (*.csv / *.txt)")
        if file:
            text = io.StringIO(file.getvalue().decode("utf-8")).read()
            data_raw = parse_text_points(text)
        else:
            st.stop()

    else:  # Paste text
        placeholder = "e.g.\n0,0,0\n0,0,1\n0,1,0\n..."
        text = st.text_area("Paste coordinates (one point per line)", height=200, placeholder=placeholder)
        if not text.strip():
            st.stop()
        data_raw = parse_text_points(text)

    st.divider()
    st.header("2️⃣  t-SNE parameters")
    perplexity = st.slider("Perplexity", 5.0, 50.0, 30.0, 1.0)
    seed = st.number_input("Random seed", value=42, step=1)
    run_button = st.button("Run t-SNE 🚀")

# --- Main area --------------------------------------------------------------
if run_button:
    if data_raw.ndim != 2 or data_raw.shape[0] < 2:
        st.error("Need at least two points; check your input.")
        st.stop()

    if perplexity >= data_raw.shape[0]:
        st.error("Perplexity must be less than the number of points.")
        st.stop()

    embedding = run_tsne(data_raw, perplexity, seed)
    df_plot = pd.DataFrame(embedding, columns=["x", "y"])

    st.subheader("2-D embedding")
    fig = px.scatter(df_plot, x="x", y="y", width=700, height=500)
    fig.update_traces(marker=dict(size=10))
    fig.update_layout(margin=dict(l=20, r=20, t=30, b=20))
    st.plotly_chart(fig, use_container_width=True)

    with st.expander("Show raw data"):
        st.write(pd.DataFrame(data_raw))