HarryLee commited on
Commit
2f1230c
·
1 Parent(s): d68475c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -73,6 +73,13 @@ form.caption("A complete list of all available metrics supported by UMAP can be
73
  form.form_submit_button("Submit")
74
 
75
  #BerTopic_model = BERTopic.load("my_topics_model")
 
 
 
 
 
 
 
76
  @st.cache(allow_output_mutation=True)
77
  def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, vectorizer_model=vectorizer_model, use_topic_reduction = use_topic_reduction, number_of_topics = number_of_topics):
78
  sentence_model = SentenceTransformer(model_name)
@@ -84,12 +91,7 @@ def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, v
84
  kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, calculate_probabilities = True)
85
  return kw_model
86
 
87
- hdbscan_model = HDBSCAN(min_cluster_size=hdbscan_min_cluster_size, min_samples = hdbscan_min_samples, metric=hdbscan_metric, prediction_data=True)
88
- if use_random_seed:
89
- umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric, random_state = 42)
90
- else:
91
- umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric)
92
- vectorizer_model = CountVectorizer(lowercase = cv_lowercase, ngram_range=(cv_ngram_min, cv_ngram_max), analyzer=cv_analyzer, max_df=cv_max_df, min_df=cv_min_df, stop_words="english")
93
 
94
 
95
  BerTopic_model = load_model(model_name=model_name)
 
73
  form.form_submit_button("Submit")
74
 
75
  #BerTopic_model = BERTopic.load("my_topics_model")
76
+ hdbscan_model = HDBSCAN(min_cluster_size=hdbscan_min_cluster_size, min_samples = hdbscan_min_samples, metric=hdbscan_metric, prediction_data=True)
77
+ if use_random_seed:
78
+ umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric, random_state = 42)
79
+ else:
80
+ umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric)
81
+ vectorizer_model = CountVectorizer(lowercase = cv_lowercase, ngram_range=(cv_ngram_min, cv_ngram_max), analyzer=cv_analyzer, max_df=cv_max_df, min_df=cv_min_df, stop_words="english")
82
+
83
  @st.cache(allow_output_mutation=True)
84
  def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, vectorizer_model=vectorizer_model, use_topic_reduction = use_topic_reduction, number_of_topics = number_of_topics):
85
  sentence_model = SentenceTransformer(model_name)
 
91
  kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, calculate_probabilities = True)
92
  return kw_model
93
 
94
+
 
 
 
 
 
95
 
96
 
97
  BerTopic_model = load_model(model_name=model_name)