Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

kambris commited on Nov 24, 2024

Commit

95436ee

verified ·

1 Parent(s): e2c8b5b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -8

app.py CHANGED Viewed

@@ -203,7 +203,7 @@ def format_emotions(emotion_counts):
         })
     return formatted_emotions
-def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
     """Process the data and generate summaries with flexible topic configuration."""
     summaries = []
@@ -213,7 +213,7 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
         "min_topic_size": min_topic_size,
         "n_gram_range": (1, 3),
         "top_n_words": 15,
-        "verbose": True,
     }
     if topic_strategy == "Manual" and n_topics is not None:
@@ -222,9 +222,8 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
         topic_model_params["nr_topics"] = "auto"
     topic_model = BERTopic(**topic_model_params)
-        # Create vectorizer with stop words
-    from sklearn.feature_extraction.text import CountVectorizer
     vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
     topic_model.vectorizer_model = vectorizer
@@ -274,8 +273,13 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
     return summaries, topic_model
-# Main application logic
 def main():
     # Load models
     try:
         bert_tokenizer, bert_model, emotion_classifier = load_models()
@@ -323,7 +327,7 @@ def main():
                 if topic_strategy == "Manual":
                     # Calculate reasonable max topics based on dataset size
                     n_documents = len(df)
-                    max_topics = max(2, min(50, n_documents // 20))
                     n_topics = st.slider(
                         "Number of Topics",
@@ -359,7 +363,10 @@ def main():
             if st.button("Process Data"):
                 with st.spinner("Processing your data..."):
                     summaries, topic_model = process_and_summarize(
-                        df,
                         top_n=top_n,
                         topic_strategy=topic_strategy,
                         n_topics=n_topics if topic_strategy == "Manual" else None,

         })
     return formatted_emotions
+def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
     """Process the data and generate summaries with flexible topic configuration."""
     summaries = []
         "min_topic_size": min_topic_size,
         "n_gram_range": (1, 3),
         "top_n_words": 15,
+        "verbose": True
     }
     if topic_strategy == "Manual" and n_topics is not None:
         topic_model_params["nr_topics"] = "auto"
     topic_model = BERTopic(**topic_model_params)
+    # Create vectorizer with stop words
     vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
     topic_model.vectorizer_model = vectorizer
     return summaries, topic_model
 def main():
+    st.set_page_config(
+        page_title="Arabic Poem Analysis",
+        page_icon="📚",
+        layout="wide"
+    )
     # Load models
     try:
         bert_tokenizer, bert_model, emotion_classifier = load_models()
                 if topic_strategy == "Manual":
                     # Calculate reasonable max topics based on dataset size
                     n_documents = len(df)
+                    max_topics = max(2, min(50, n_documents // 20))  # Ensure minimum of 2
                     n_topics = st.slider(
                         "Number of Topics",
             if st.button("Process Data"):
                 with st.spinner("Processing your data..."):
                     summaries, topic_model = process_and_summarize(
+                        df,
+                        bert_tokenizer,
+                        bert_model,
+                        emotion_classifier,
                         top_n=top_n,
                         topic_strategy=topic_strategy,
                         n_topics=n_topics if topic_strategy == "Manual" else None,