wilmerags commited on
Commit
f0f3e26
Β·
1 Parent(s): 401a74f

feat: Switching to L6 version of the model to improve times of encoding

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -22,7 +22,7 @@ from sentence_transformers import SentenceTransformer, util
22
 
23
  client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
24
  model_to_use = {
25
- "English": "all-MiniLM-L12-v2",
26
  "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
27
  }
28
 
@@ -135,12 +135,12 @@ def generate_plot(
135
  ) -> Figure:
136
  with st.spinner(text=f"Trying to understand '{tw_user}' tweets... πŸ€”"):
137
  embeddings = embed_text(tws_cleaned, model)
138
- # encoded_labels = encode_labels(labels)
139
- cluster = hdbscan.HDBSCAN(
140
- min_cluster_size=3,
141
- metric='euclidean',
142
- cluster_selection_method='eom'
143
- ).fit(embeddings)
144
  encoded_labels = cluster.labels_
145
  cluster_keyword = {}
146
  with st.spinner("Now trying to express them with my own words... πŸ’¬"):
 
22
 
23
  client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
24
  model_to_use = {
25
+ "English": "all-MiniLM-L6-v2",
26
  "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
27
  }
28
 
 
135
  ) -> Figure:
136
  with st.spinner(text=f"Trying to understand '{tw_user}' tweets... πŸ€”"):
137
  embeddings = embed_text(tws_cleaned, model)
138
+ # encoded_labels = encode_labels(labels)
139
+ cluster = hdbscan.HDBSCAN(
140
+ min_cluster_size=3,
141
+ metric='euclidean',
142
+ cluster_selection_method='eom'
143
+ ).fit(embeddings)
144
  encoded_labels = cluster.labels_
145
  cluster_keyword = {}
146
  with st.spinner("Now trying to express them with my own words... πŸ’¬"):