Spaces:

wilmerags
/

tweet-snest

Build error

wilmerags commited on Nov 25, 2021

Commit

f0f3e26

1 Parent(s): 401a74f

feat: Switching to L6 version of the model to improve times of encoding

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ from sentence_transformers import SentenceTransformer, util
 client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
 model_to_use = {
-    "English": "all-MiniLM-L12-v2",
     "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
 }
@@ -135,12 +135,12 @@ def generate_plot(
 ) -> Figure:
     with st.spinner(text=f"Trying to understand '{tw_user}' tweets... 🤔"):
         embeddings = embed_text(tws_cleaned, model)
-    # encoded_labels = encode_labels(labels)
-    cluster = hdbscan.HDBSCAN(
-        min_cluster_size=3,
-        metric='euclidean',
-        cluster_selection_method='eom'
-    ).fit(embeddings)
     encoded_labels = cluster.labels_
     cluster_keyword = {}
     with st.spinner("Now trying to express them with my own words... 💬"):

 client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
 model_to_use = {
+    "English": "all-MiniLM-L6-v2",
     "Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
 }
 ) -> Figure:
     with st.spinner(text=f"Trying to understand '{tw_user}' tweets... 🤔"):
         embeddings = embed_text(tws_cleaned, model)
+        # encoded_labels = encode_labels(labels)
+        cluster = hdbscan.HDBSCAN(
+            min_cluster_size=3,
+            metric='euclidean',
+            cluster_selection_method='eom'
+        ).fit(embeddings)
     encoded_labels = cluster.labels_
     cluster_keyword = {}
     with st.spinner("Now trying to express them with my own words... 💬"):