Spaces:

wilmerags
/

tweet-snest

Build error

App Files Files Community

wilmerags commited on Nov 24, 2021

Commit

a5482a3

1 Parent(s): 86cf6b2

fix: Remove missing and unused logging sections and testing exp version for labels approach

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -13,7 +13,15 @@ from bokeh.transform import factor_cmap
 from sklearn.manifold import TSNE
 from sentence_transformers import SentenceTransformer
 # Original implementation from: https://huggingface.co/spaces/edugp/embedding-lenses/blob/main/app.py
 @st.cache(show_spinner=False, allow_output_mutation=True)
 def load_model(model_name: str) -> SentenceTransformer:
     embedder = model_name
@@ -60,23 +68,16 @@ def generate_plot(
 ) -> Figure:
     with st.spinner(text="Embedding text..."):
         embeddings = embed_text(df, model)
-    logger.info("Encoding labels")
-    encoded_labels = encode_labels(labels)
     with st.spinner("Reducing dimensionality..."):
         embeddings_2d = get_tsne_embeddings(embeddings)
-    logger.info("Generating figure")
     plot = draw_interactive_scatter_plot(
         df, embeddings_2d[:, 0], embeddings_2d[:, 1], encoded_labels.values, labels, 'text', 'label'
     )
     return plot
-client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
-model_to_use = {
-    "English": "all-MiniLM-L12-v2",
-    "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
-}
 st.title("Tweet-SNEst")
 st.write("Visualize tweets embeddings in 2D using colors for topics labels.")
 col1, col2 = st.columns(2)
@@ -112,6 +113,4 @@ if tw_user:
     labels = [0] * len(tweets_txt)
     # plot = generate_plot(df, text_column, label_column, sample, dimensionality_reduction_function, model)
     plot = generate_plot(tweets_txt, labels, model)
-    logger.info("Displaying plot")
-    st.bokeh_chart(plot)
-    logger.info("Done")

 from sklearn.manifold import TSNE
 from sentence_transformers import SentenceTransformer
+client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
+model_to_use = {
+    "English": "all-MiniLM-L12-v2",
+    "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
+}
 # Original implementation from: https://huggingface.co/spaces/edugp/embedding-lenses/blob/main/app.py
+SEED = 42
 @st.cache(show_spinner=False, allow_output_mutation=True)
 def load_model(model_name: str) -> SentenceTransformer:
     embedder = model_name
 ) -> Figure:
     with st.spinner(text="Embedding text..."):
         embeddings = embed_text(df, model)
+    # encoded_labels = encode_labels(labels)
+    encoded_labels = labels
     with st.spinner("Reducing dimensionality..."):
         embeddings_2d = get_tsne_embeddings(embeddings)
     plot = draw_interactive_scatter_plot(
         df, embeddings_2d[:, 0], embeddings_2d[:, 1], encoded_labels.values, labels, 'text', 'label'
     )
     return plot
 st.title("Tweet-SNEst")
 st.write("Visualize tweets embeddings in 2D using colors for topics labels.")
 col1, col2 = st.columns(2)
     labels = [0] * len(tweets_txt)
     # plot = generate_plot(df, text_column, label_column, sample, dimensionality_reduction_function, model)
     plot = generate_plot(tweets_txt, labels, model)
+    st.bokeh_chart(plot)