wilmerags commited on
Commit
0d13483
Β·
1 Parent(s): 53bad35

fix: Adding a copy of cleaned text for internal representation and keeping original text for visualization

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -115,11 +115,12 @@ def draw_interactive_scatter_plot(
115
  # Up to here
116
  def generate_plot(
117
  tws: List[str],
 
118
  model: SentenceTransformer,
119
  tw_user: str
120
  ) -> Figure:
121
  with st.spinner(text=f"Trying to understand '{tw_user}' tweets... πŸ€”"):
122
- embeddings = embed_text(tws, model)
123
  # encoded_labels = encode_labels(labels)
124
  cluster = hdbscan.HDBSCAN(
125
  min_cluster_size=3,
@@ -173,9 +174,8 @@ if go_btn and tw_user != '':
173
  tweets_objs += tweets_response.data
174
  tweets_txt = [tweet.text for tweet in tweets_objs]
175
  tweets_txt = list(set(tweets_txt))
176
- tweets_txt = preprocess(tweets_txt)
177
- # plot = generate_plot(df, text_column, label_column, sample, dimensionality_reduction_function, model)
178
- plot = generate_plot(tweets_txt, model, tw_user)
179
  st.bokeh_chart(plot)
180
  elif go_btn and tw_user == '':
181
  st.warning('Twitter handler field is empty πŸ™„')
 
115
  # Up to here
116
  def generate_plot(
117
  tws: List[str],
118
+ tws_cleaned: List[str],
119
  model: SentenceTransformer,
120
  tw_user: str
121
  ) -> Figure:
122
  with st.spinner(text=f"Trying to understand '{tw_user}' tweets... πŸ€”"):
123
+ embeddings = embed_text(tws_cleaned, model)
124
  # encoded_labels = encode_labels(labels)
125
  cluster = hdbscan.HDBSCAN(
126
  min_cluster_size=3,
 
174
  tweets_objs += tweets_response.data
175
  tweets_txt = [tweet.text for tweet in tweets_objs]
176
  tweets_txt = list(set(tweets_txt))
177
+ tweets_txt_cleaned = preprocess(tweets_txt)
178
+ plot = generate_plot(tweets_txt, tweets_txt_cleaned, model, tw_user)
 
179
  st.bokeh_chart(plot)
180
  elif go_btn and tw_user == '':
181
  st.warning('Twitter handler field is empty πŸ™„')