Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

kambris commited on Nov 24, 2024

Commit

afa7452

verified ·

1 Parent(s): f963213

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -124

app.py CHANGED Viewed

@@ -271,138 +271,135 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
     return summaries, topic_model
-# Load models
-try:
-    bert_tokenizer, bert_model, emotion_classifier = load_models()
-    st.success("Models loaded successfully!")
-except Exception as e:
-    st.error(f"Error loading models: {str(e)}")
-    st.stop()
-# Main app interface
-st.title("📚 Arabic Poem Analysis")
-st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
-# File upload
-uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
-if uploaded_file is not None:
-    try:
-        # Read the file
-        if uploaded_file.name.endswith('.csv'):
-            df = pd.read_csv(uploaded_file)
-        else:
-            df = pd.read_excel(uploaded_file)
-        # Validate columns
-        required_columns = ['country', 'poem']
-        if not all(col in df.columns for col in required_columns):
-            st.error("File must contain 'country' and 'poem' columns.")
-            st.stop()
-        # Clean data
-        df['country'] = df['country'].str.strip()
-        df = df.dropna(subset=['country', 'poem'])
-        # Add topic modeling controls
-        st.subheader("Topic Modeling Settings")
-        col1, col2 = st.columns(2)
-        with col1:
-            topic_strategy = st.radio(
-                "Topic Number Strategy",
-                ["Auto", "Manual"],
-                help="Choose whether to let the model determine the optimal number of topics or set it manually"
-            )
-            if topic_strategy == "Manual":
-                # Calculate reasonable max topics based on dataset size
-                n_documents = len(df)
-                if n_documents < 1000:
-                    max_topics = min(50, n_documents // 20)
-                else:
-                    max_topics = min(500, int(np.log10(n_documents) * 100))
-                n_topics = st.slider(
-                    "Number of Topics",
-                    min_value=2,
-                    max_value=max_topics,
-                    value=min(20, max_topics),
-                    help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
                 )
-                st.info(f"""
-                    💡 For your dataset of {n_documents:,} documents:
-                    - Minimum topics: 2
-                    - Maximum topics: {max_topics}
-                    - Recommended range: {max(2, max_topics//5)}-{max_topics//2}
-                    """)
-        with col2:
-            top_n = st.number_input(
-                "Number of top topics/emotions to display:",
-                min_value=1,
-                max_value=100,
-                value=10
-            )
-            min_topic_size = st.slider(
-                "Minimum Topic Size",
-                min_value=10,
-                max_value=100,
-                value=30,
-                help="Minimum number of documents required to form a topic"
-            )
-    except Exception as e:
-            st.error(f"Error loading models: {str(e)}")
-            st.stop()
-if st.button("Process Data"):
-            with st.spinner("Processing your data..."):
-                summaries, topic_model = process_and_summarize(df, top_n=top_n, topic_strategy=topic_strategy, n_topics=n_topics, min_topic_size=min_topic_size)
-                if summaries:
-                    st.success("Analysis complete!")
-                    # Display results in tabs
-                    tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
-                    with tab1:
-                        for summary in summaries:
-                            with st.expander(f"📍 {summary['country']} ({summary['total_poems']} poems)"):
-                                col1, col2 = st.columns(2)
-                                with col1:
-                                    st.subheader("Top Topics")
-                                    for topic in summary['top_topics']:
-                                        st.write(f"• {topic['topic']}: {topic['count']} poems")
-                                with col2:
-                                    st.subheader("Emotions")
-                                    for emotion in summary['top_emotions']:
-                                        st.write(f"• {emotion['emotion']}: {emotion['count']} poems")
-                    with tab2:
-                        st.subheader("Global Topic Distribution")
-                        topic_info = topic_model.get_topic_info()
-                        for _, row in topic_info.iterrows():
-                            if row['Topic'] == -1:
-                                topic_name = "Miscellaneous"
-                            else:
-                                words = topic_model.get_topic(row['Topic'])
-                                topic_name = " | ".join([word for word, _ in words[:5]])
-                            st.write(f"• Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
-    except Exception as e:
-        st.error(f"Error processing file: {str(e)}")
-else:
-    st.info("👆 Upload a file to get started!")
-    # Example format
-    st.write("### Expected File Format:")
-    example_df = pd.DataFrame({
-        'country': ['Egypt', 'Palestine'],
-        'poem': ['قصيدة مصرية', 'قصيدة فلسطينية ']
-    })
-    st.dataframe(example_df)

     return summaries, topic_model
+# Main application logic
+def main():
+    # Load models
+    try:
+        bert_tokenizer, bert_model, emotion_classifier = load_models()
+        st.success("Models loaded successfully!")
+    except Exception as e:
+        st.error(f"Error loading models: {str(e)}")
+        st.stop()
+    # Main app interface
+    st.title("📚 Arabic Poem Analysis")
+    st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
+    # File upload
+    uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
+    if uploaded_file is not None:
+        try:
+            # Read the file
+            if uploaded_file.name.endswith('.csv'):
+                df = pd.read_csv(uploaded_file)
+            else:
+                df = pd.read_excel(uploaded_file)
+            # Validate columns
+            required_columns = ['country', 'poem']
+            if not all(col in df.columns for col in required_columns):
+                st.error("File must contain 'country' and 'poem' columns.")
+                st.stop()
+            # Clean data
+            df['country'] = df['country'].str.strip()
+            df = df.dropna(subset=['country', 'poem'])
+            # Add topic modeling controls
+            st.subheader("Topic Modeling Settings")
+            col1, col2 = st.columns(2)
+            with col1:
+                topic_strategy = st.radio(
+                    "Topic Number Strategy",
+                    ["Auto", "Manual"],
+                    help="Choose whether to let the model determine the optimal number of topics or set it manually"
                 )
+                if topic_strategy == "Manual":
+                    n_documents = len(df)
+                    max_topics = min(500, int(np.log10(n_documents) * 100))
+                    n_topics = st.slider(
+                        "Number of Topics",
+                        min_value=2,
+                        max_value=max_topics,
+                        value=min(20, max_topics),
+                        help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
+                    )
+            with col2:
+                top_n = st.number_input(
+                    "Number of top topics/emotions to display:",
+                    min_value=1,
+                    max_value=100,
+                    value=10
+                )
+                min_topic_size = st.slider(
+                    "Minimum Topic Size",
+                    min_value=10,
+                    max_value=100,
+                    value=30,
+                    help="Minimum number of documents required to form a topic"
+                )
+            if st.button("Process Data"):
+                with st.spinner("Processing your data..."):
+                    summaries, topic_model = process_and_summarize(
+                        df,
+                        top_n=top_n,
+                        topic_strategy=topic_strategy,
+                        n_topics=n_topics if topic_strategy == "Manual" else None,
+                        min_topic_size=min_topic_size
+                    )
+                    if summaries:
+                        st.success("Analysis complete!")
+                        # Display results in tabs
+                        tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
+                        with tab1:
+                            for summary in summaries:
+                                with st.expander(f"📍 {summary['country']} ({summary['total_poems']} poems)"):
+                                    col1, col2 = st.columns(2)
+                                    with col1:
+                                        st.subheader("Top Topics")
+                                        for topic in summary['top_topics']:
+                                            st.write(f"• {topic['topic']}: {topic['count']} poems")
+                                    with col2:
+                                        st.subheader("Emotions")
+                                        for emotion in summary['top_emotions']:
+                                            st.write(f"• {emotion['emotion']}: {emotion['count']} poems")
+                        with tab2:
+                            st.subheader("Global Topic Distribution")
+                            topic_info = topic_model.get_topic_info()
+                            for _, row in topic_info.iterrows():
+                                if row['Topic'] == -1:
+                                    topic_name = "Miscellaneous"
+                                else:
+                                    words = topic_model.get_topic(row['Topic'])
+                                    topic_name = " | ".join([word for word, _ in words[:5]])
+                                st.write(f"• Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
+        except Exception as e:
+            st.error(f"Error processing file: {str(e)}")
+    else:
+        st.info("👆 Upload a file to get started!")
+        # Example format
+        st.write("### Expected File Format:")
+        example_df = pd.DataFrame({
+            'country': ['Egypt', 'Palestine'],
+            'poem': ['قصيدة مصرية', 'قصيدة فلسطينية']
+        })
+        st.dataframe(example_df)
+if __name__ == "__main__":
+    main()