wilmerags commited on
Commit
a5482a3
·
1 Parent(s): 86cf6b2

fix: Remove missing and unused logging sections and testing exp version for labels approach

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -13,7 +13,15 @@ from bokeh.transform import factor_cmap
13
  from sklearn.manifold import TSNE
14
  from sentence_transformers import SentenceTransformer
15
 
 
 
 
 
 
 
16
  # Original implementation from: https://huggingface.co/spaces/edugp/embedding-lenses/blob/main/app.py
 
 
17
  @st.cache(show_spinner=False, allow_output_mutation=True)
18
  def load_model(model_name: str) -> SentenceTransformer:
19
  embedder = model_name
@@ -60,23 +68,16 @@ def generate_plot(
60
  ) -> Figure:
61
  with st.spinner(text="Embedding text..."):
62
  embeddings = embed_text(df, model)
63
- logger.info("Encoding labels")
64
- encoded_labels = encode_labels(labels)
65
  with st.spinner("Reducing dimensionality..."):
66
  embeddings_2d = get_tsne_embeddings(embeddings)
67
- logger.info("Generating figure")
68
  plot = draw_interactive_scatter_plot(
69
  df, embeddings_2d[:, 0], embeddings_2d[:, 1], encoded_labels.values, labels, 'text', 'label'
70
  )
71
  return plot
72
 
73
 
74
- client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
75
- model_to_use = {
76
- "English": "all-MiniLM-L12-v2",
77
- "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
78
- }
79
-
80
  st.title("Tweet-SNEst")
81
  st.write("Visualize tweets embeddings in 2D using colors for topics labels.")
82
  col1, col2 = st.columns(2)
@@ -112,6 +113,4 @@ if tw_user:
112
  labels = [0] * len(tweets_txt)
113
  # plot = generate_plot(df, text_column, label_column, sample, dimensionality_reduction_function, model)
114
  plot = generate_plot(tweets_txt, labels, model)
115
- logger.info("Displaying plot")
116
- st.bokeh_chart(plot)
117
- logger.info("Done")
 
13
  from sklearn.manifold import TSNE
14
  from sentence_transformers import SentenceTransformer
15
 
16
+ client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
17
+ model_to_use = {
18
+ "English": "all-MiniLM-L12-v2",
19
+ "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
20
+ }
21
+
22
  # Original implementation from: https://huggingface.co/spaces/edugp/embedding-lenses/blob/main/app.py
23
+ SEED = 42
24
+
25
  @st.cache(show_spinner=False, allow_output_mutation=True)
26
  def load_model(model_name: str) -> SentenceTransformer:
27
  embedder = model_name
 
68
  ) -> Figure:
69
  with st.spinner(text="Embedding text..."):
70
  embeddings = embed_text(df, model)
71
+ # encoded_labels = encode_labels(labels)
72
+ encoded_labels = labels
73
  with st.spinner("Reducing dimensionality..."):
74
  embeddings_2d = get_tsne_embeddings(embeddings)
 
75
  plot = draw_interactive_scatter_plot(
76
  df, embeddings_2d[:, 0], embeddings_2d[:, 1], encoded_labels.values, labels, 'text', 'label'
77
  )
78
  return plot
79
 
80
 
 
 
 
 
 
 
81
  st.title("Tweet-SNEst")
82
  st.write("Visualize tweets embeddings in 2D using colors for topics labels.")
83
  col1, col2 = st.columns(2)
 
113
  labels = [0] * len(tweets_txt)
114
  # plot = generate_plot(df, text_column, label_column, sample, dimensionality_reduction_function, model)
115
  plot = generate_plot(tweets_txt, labels, model)
116
+ st.bokeh_chart(plot)