Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -73,6 +73,13 @@ form.caption("A complete list of all available metrics supported by UMAP can be
|
|
73 |
form.form_submit_button("Submit")
|
74 |
|
75 |
#BerTopic_model = BERTopic.load("my_topics_model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
@st.cache(allow_output_mutation=True)
|
77 |
def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, vectorizer_model=vectorizer_model, use_topic_reduction = use_topic_reduction, number_of_topics = number_of_topics):
|
78 |
sentence_model = SentenceTransformer(model_name)
|
@@ -84,12 +91,7 @@ def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, v
|
|
84 |
kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, calculate_probabilities = True)
|
85 |
return kw_model
|
86 |
|
87 |
-
|
88 |
-
if use_random_seed:
|
89 |
-
umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric, random_state = 42)
|
90 |
-
else:
|
91 |
-
umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric)
|
92 |
-
vectorizer_model = CountVectorizer(lowercase = cv_lowercase, ngram_range=(cv_ngram_min, cv_ngram_max), analyzer=cv_analyzer, max_df=cv_max_df, min_df=cv_min_df, stop_words="english")
|
93 |
|
94 |
|
95 |
BerTopic_model = load_model(model_name=model_name)
|
|
|
73 |
form.form_submit_button("Submit")
|
74 |
|
75 |
#BerTopic_model = BERTopic.load("my_topics_model")
|
76 |
+
hdbscan_model = HDBSCAN(min_cluster_size=hdbscan_min_cluster_size, min_samples = hdbscan_min_samples, metric=hdbscan_metric, prediction_data=True)
|
77 |
+
if use_random_seed:
|
78 |
+
umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric, random_state = 42)
|
79 |
+
else:
|
80 |
+
umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric)
|
81 |
+
vectorizer_model = CountVectorizer(lowercase = cv_lowercase, ngram_range=(cv_ngram_min, cv_ngram_max), analyzer=cv_analyzer, max_df=cv_max_df, min_df=cv_min_df, stop_words="english")
|
82 |
+
|
83 |
@st.cache(allow_output_mutation=True)
|
84 |
def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, vectorizer_model=vectorizer_model, use_topic_reduction = use_topic_reduction, number_of_topics = number_of_topics):
|
85 |
sentence_model = SentenceTransformer(model_name)
|
|
|
91 |
kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, calculate_probabilities = True)
|
92 |
return kw_model
|
93 |
|
94 |
+
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
|
97 |
BerTopic_model = load_model(model_name=model_name)
|