HarryLee commited on
Commit
d68475c
·
1 Parent(s): f47ef77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -1,7 +1,11 @@
1
- import streamlit as st
2
  from bertopic import BERTopic
3
- import re
 
 
4
  import pandas as pd
 
 
 
5
  from sklearn.feature_extraction.text import CountVectorizer
6
 
7
  st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
@@ -79,7 +83,15 @@ def load_model(model_name, hdbscan_model=hdbscan_model, umap_model=umap_model, v
79
  else:
80
  kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, calculate_probabilities = True)
81
  return kw_model
82
-
 
 
 
 
 
 
 
 
83
  BerTopic_model = load_model(model_name=model_name)
84
  input_text = st.text_area("Enter product topic here")
85
 
 
 
1
  from bertopic import BERTopic
2
+ import streamlit as st
3
+ import streamlit.components.v1 as components
4
+ from datasets import load_dataset
5
  import pandas as pd
6
+ from sentence_transformers import SentenceTransformer
7
+ from umap import UMAP
8
+ from hdbscan import HDBSCAN
9
  from sklearn.feature_extraction.text import CountVectorizer
10
 
11
  st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
 
83
  else:
84
  kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, calculate_probabilities = True)
85
  return kw_model
86
+
87
+ hdbscan_model = HDBSCAN(min_cluster_size=hdbscan_min_cluster_size, min_samples = hdbscan_min_samples, metric=hdbscan_metric, prediction_data=True)
88
+ if use_random_seed:
89
+ umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric, random_state = 42)
90
+ else:
91
+ umap_model = UMAP(n_neighbors=umap_n_neighbors, n_components=umap_n_components, min_dist=umap_min_dist, metric=umap_metric)
92
+ vectorizer_model = CountVectorizer(lowercase = cv_lowercase, ngram_range=(cv_ngram_min, cv_ngram_max), analyzer=cv_analyzer, max_df=cv_max_df, min_df=cv_min_df, stop_words="english")
93
+
94
+
95
  BerTopic_model = load_model(model_name=model_name)
96
  input_text = st.text_area("Enter product topic here")
97