kambris commited on
Commit
95436ee
·
verified ·
1 Parent(s): e2c8b5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -203,7 +203,7 @@ def format_emotions(emotion_counts):
203
  })
204
  return formatted_emotions
205
 
206
- def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
207
  """Process the data and generate summaries with flexible topic configuration."""
208
  summaries = []
209
 
@@ -213,7 +213,7 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
213
  "min_topic_size": min_topic_size,
214
  "n_gram_range": (1, 3),
215
  "top_n_words": 15,
216
- "verbose": True,
217
  }
218
 
219
  if topic_strategy == "Manual" and n_topics is not None:
@@ -222,9 +222,8 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
222
  topic_model_params["nr_topics"] = "auto"
223
 
224
  topic_model = BERTopic(**topic_model_params)
225
-
226
- # Create vectorizer with stop words
227
- from sklearn.feature_extraction.text import CountVectorizer
228
  vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
229
  topic_model.vectorizer_model = vectorizer
230
 
@@ -274,8 +273,13 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
274
 
275
  return summaries, topic_model
276
 
277
- # Main application logic
278
  def main():
 
 
 
 
 
 
279
  # Load models
280
  try:
281
  bert_tokenizer, bert_model, emotion_classifier = load_models()
@@ -323,7 +327,7 @@ def main():
323
  if topic_strategy == "Manual":
324
  # Calculate reasonable max topics based on dataset size
325
  n_documents = len(df)
326
- max_topics = max(2, min(50, n_documents // 20))
327
 
328
  n_topics = st.slider(
329
  "Number of Topics",
@@ -359,7 +363,10 @@ def main():
359
  if st.button("Process Data"):
360
  with st.spinner("Processing your data..."):
361
  summaries, topic_model = process_and_summarize(
362
- df,
 
 
 
363
  top_n=top_n,
364
  topic_strategy=topic_strategy,
365
  n_topics=n_topics if topic_strategy == "Manual" else None,
 
203
  })
204
  return formatted_emotions
205
 
206
+ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
207
  """Process the data and generate summaries with flexible topic configuration."""
208
  summaries = []
209
 
 
213
  "min_topic_size": min_topic_size,
214
  "n_gram_range": (1, 3),
215
  "top_n_words": 15,
216
+ "verbose": True
217
  }
218
 
219
  if topic_strategy == "Manual" and n_topics is not None:
 
222
  topic_model_params["nr_topics"] = "auto"
223
 
224
  topic_model = BERTopic(**topic_model_params)
225
+
226
+ # Create vectorizer with stop words
 
227
  vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
228
  topic_model.vectorizer_model = vectorizer
229
 
 
273
 
274
  return summaries, topic_model
275
 
 
276
  def main():
277
+ st.set_page_config(
278
+ page_title="Arabic Poem Analysis",
279
+ page_icon="📚",
280
+ layout="wide"
281
+ )
282
+
283
  # Load models
284
  try:
285
  bert_tokenizer, bert_model, emotion_classifier = load_models()
 
327
  if topic_strategy == "Manual":
328
  # Calculate reasonable max topics based on dataset size
329
  n_documents = len(df)
330
+ max_topics = max(2, min(50, n_documents // 20)) # Ensure minimum of 2
331
 
332
  n_topics = st.slider(
333
  "Number of Topics",
 
363
  if st.button("Process Data"):
364
  with st.spinner("Processing your data..."):
365
  summaries, topic_model = process_and_summarize(
366
+ df,
367
+ bert_tokenizer,
368
+ bert_model,
369
+ emotion_classifier,
370
  top_n=top_n,
371
  topic_strategy=topic_strategy,
372
  n_topics=n_topics if topic_strategy == "Manual" else None,