Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -214,7 +214,6 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
214 |
"n_gram_range": (1, 3),
|
215 |
"top_n_words": 15,
|
216 |
"verbose": True,
|
217 |
-
"stop_words": ARABIC_STOP_WORDS
|
218 |
}
|
219 |
|
220 |
if topic_strategy == "Manual" and n_topics is not None:
|
@@ -223,6 +222,11 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
223 |
topic_model_params["nr_topics"] = "auto"
|
224 |
|
225 |
topic_model = BERTopic(**topic_model_params)
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
for country, group in df.groupby('country'):
|
228 |
progress_text = f"Processing poems for {country}..."
|
@@ -319,10 +323,7 @@ def main():
|
|
319 |
if topic_strategy == "Manual":
|
320 |
# Calculate reasonable max topics based on dataset size
|
321 |
n_documents = len(df)
|
322 |
-
|
323 |
-
max_topics = min(50, n_documents // 20)
|
324 |
-
else:
|
325 |
-
max_topics = min(500, int(np.log10(n_documents) * 100))
|
326 |
|
327 |
n_topics = st.slider(
|
328 |
"Number of Topics",
|
|
|
214 |
"n_gram_range": (1, 3),
|
215 |
"top_n_words": 15,
|
216 |
"verbose": True,
|
|
|
217 |
}
|
218 |
|
219 |
if topic_strategy == "Manual" and n_topics is not None:
|
|
|
222 |
topic_model_params["nr_topics"] = "auto"
|
223 |
|
224 |
topic_model = BERTopic(**topic_model_params)
|
225 |
+
|
226 |
+
# Create vectorizer with stop words
|
227 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
228 |
+
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
|
229 |
+
topic_model.vectorizer_model = vectorizer
|
230 |
|
231 |
for country, group in df.groupby('country'):
|
232 |
progress_text = f"Processing poems for {country}..."
|
|
|
323 |
if topic_strategy == "Manual":
|
324 |
# Calculate reasonable max topics based on dataset size
|
325 |
n_documents = len(df)
|
326 |
+
max_topics = max(2, min(50, n_documents // 20))
|
|
|
|
|
|
|
327 |
|
328 |
n_topics = st.slider(
|
329 |
"Number of Topics",
|