Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -203,7 +203,7 @@ def format_emotions(emotion_counts):
|
|
203 |
})
|
204 |
return formatted_emotions
|
205 |
|
206 |
-
def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
|
207 |
"""Process the data and generate summaries with flexible topic configuration."""
|
208 |
summaries = []
|
209 |
|
@@ -213,7 +213,7 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
213 |
"min_topic_size": min_topic_size,
|
214 |
"n_gram_range": (1, 3),
|
215 |
"top_n_words": 15,
|
216 |
-
"verbose": True
|
217 |
}
|
218 |
|
219 |
if topic_strategy == "Manual" and n_topics is not None:
|
@@ -222,9 +222,8 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
222 |
topic_model_params["nr_topics"] = "auto"
|
223 |
|
224 |
topic_model = BERTopic(**topic_model_params)
|
225 |
-
|
226 |
-
|
227 |
-
from sklearn.feature_extraction.text import CountVectorizer
|
228 |
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
|
229 |
topic_model.vectorizer_model = vectorizer
|
230 |
|
@@ -274,8 +273,13 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
274 |
|
275 |
return summaries, topic_model
|
276 |
|
277 |
-
# Main application logic
|
278 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
# Load models
|
280 |
try:
|
281 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
@@ -323,7 +327,7 @@ def main():
|
|
323 |
if topic_strategy == "Manual":
|
324 |
# Calculate reasonable max topics based on dataset size
|
325 |
n_documents = len(df)
|
326 |
-
max_topics = max(2, min(50, n_documents // 20))
|
327 |
|
328 |
n_topics = st.slider(
|
329 |
"Number of Topics",
|
@@ -359,7 +363,10 @@ def main():
|
|
359 |
if st.button("Process Data"):
|
360 |
with st.spinner("Processing your data..."):
|
361 |
summaries, topic_model = process_and_summarize(
|
362 |
-
df,
|
|
|
|
|
|
|
363 |
top_n=top_n,
|
364 |
topic_strategy=topic_strategy,
|
365 |
n_topics=n_topics if topic_strategy == "Manual" else None,
|
|
|
203 |
})
|
204 |
return formatted_emotions
|
205 |
|
206 |
+
def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
|
207 |
"""Process the data and generate summaries with flexible topic configuration."""
|
208 |
summaries = []
|
209 |
|
|
|
213 |
"min_topic_size": min_topic_size,
|
214 |
"n_gram_range": (1, 3),
|
215 |
"top_n_words": 15,
|
216 |
+
"verbose": True
|
217 |
}
|
218 |
|
219 |
if topic_strategy == "Manual" and n_topics is not None:
|
|
|
222 |
topic_model_params["nr_topics"] = "auto"
|
223 |
|
224 |
topic_model = BERTopic(**topic_model_params)
|
225 |
+
|
226 |
+
# Create vectorizer with stop words
|
|
|
227 |
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
|
228 |
topic_model.vectorizer_model = vectorizer
|
229 |
|
|
|
273 |
|
274 |
return summaries, topic_model
|
275 |
|
|
|
276 |
def main():
|
277 |
+
st.set_page_config(
|
278 |
+
page_title="Arabic Poem Analysis",
|
279 |
+
page_icon="📚",
|
280 |
+
layout="wide"
|
281 |
+
)
|
282 |
+
|
283 |
# Load models
|
284 |
try:
|
285 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
|
|
327 |
if topic_strategy == "Manual":
|
328 |
# Calculate reasonable max topics based on dataset size
|
329 |
n_documents = len(df)
|
330 |
+
max_topics = max(2, min(50, n_documents // 20)) # Ensure minimum of 2
|
331 |
|
332 |
n_topics = st.slider(
|
333 |
"Number of Topics",
|
|
|
363 |
if st.button("Process Data"):
|
364 |
with st.spinner("Processing your data..."):
|
365 |
summaries, topic_model = process_and_summarize(
|
366 |
+
df,
|
367 |
+
bert_tokenizer,
|
368 |
+
bert_model,
|
369 |
+
emotion_classifier,
|
370 |
top_n=top_n,
|
371 |
topic_strategy=topic_strategy,
|
372 |
n_topics=n_topics if topic_strategy == "Manual" else None,
|