Spaces:

maazamjad
/

sent_analysis

Sleeping

App Files Files Community

maazamjad commited on Jun 22

Commit

2bef3bb

verified ·

1 Parent(s): b4b21b0

Update app.py

Browse files

Files changed (1) hide show

app.py +631 -608

app.py CHANGED Viewed

@@ -1,52 +1,24 @@
-# STREAMLIT ML CLASSIFICATION APP - DUAL MODEL SUPPORT
 # =====================================================
-import streamlit as st
 import pandas as pd
 import numpy as np
 import joblib
 import matplotlib.pyplot as plt
 import seaborn as sns
-# Page Configuration
-st.set_page_config(
-    page_title="ML Text Classifier",
-    page_icon="🤖",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-# Custom CSS
-st.markdown("""
-<style>
-    .main-header {
-        font-size: 2.5rem;
-        color: #1f77b4;
-        text-align: center;
-        margin-bottom: 2rem;
-    }
-    .success-box {
-        padding: 1rem;
-        border-radius: 0.5rem;
-        background-color: #d4edda;
-        border: 1px solid #c3e6cb;
-        margin: 1rem 0;
-    }
-    .metric-card {
-        background-color: #f8f9fa;
-        padding: 1rem;
-        border-radius: 0.5rem;
-        border-left: 4px solid #007bff;
-    }
-</style>
-""", unsafe_allow_html=True)
 # ============================================================================
 # MODEL LOADING SECTION
 # ============================================================================
-@st.cache_resource
 def load_models():
     models = {}
     try:
@@ -83,379 +55,514 @@ def load_models():
         individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
         if not (pipeline_ready or individual_ready):
-            st.error("No complete model setup found!")
             return None
         return models
     except Exception as e:
-        st.error(f"Error loading models: {e}")
         return None
 # ============================================================================
-# PREDICTION FUNCTION
 # ============================================================================
-def make_prediction(text, model_choice, models):
     """Make prediction using the selected model"""
-    if models is None:
-        return None, None
     try:
         prediction = None
         probabilities = None
-        if model_choice == "pipeline" and models.get('pipeline_available'):
-            # Use the complete pipeline (Logistic Regression)
-            prediction = models['pipeline'].predict([text])[0]
-            probabilities = models['pipeline'].predict_proba([text])[0]
-        elif model_choice == "logistic_regression":
-            if models.get('pipeline_available'):
-                # Use pipeline for LR
-                prediction = models['pipeline'].predict([text])[0]
-                probabilities = models['pipeline'].predict_proba([text])[0]
-            elif models.get('vectorizer_available') and models.get('lr_available'):
                 # Use individual components
-                X = models['vectorizer'].transform([text])
-                prediction = models['logistic_regression'].predict(X)[0]
-                probabilities = models['logistic_regression'].predict_proba(X)[0]
-        elif model_choice == "naive_bayes":
-            if models.get('vectorizer_available') and models.get('nb_available'):
                 # Use individual components for NB
-                X = models['vectorizer'].transform([text])
-                prediction = models['naive_bayes'].predict(X)[0]
-                probabilities = models['naive_bayes'].predict_proba(X)[0]
         if prediction is not None and probabilities is not None:
             # Convert to readable format
             class_names = ['Negative', 'Positive']
             prediction_label = class_names[prediction]
-            return prediction_label, probabilities
         else:
-            return None, None
     except Exception as e:
-        st.error(f"Error making prediction: {e}")
-        st.error(f"Model choice: {model_choice}")
-        st.error(f"Available models: {[k for k, v in models.items() if isinstance(v, bool) and v]}")
-        return None, None
-def get_available_models(models):
     """Get list of available models for selection"""
     available = []
-    if models is None:
-        return available
-    if models.get('pipeline_available'):
-        available.append(("logistic_regression", "📈 Logistic Regression (Pipeline)"))
-    elif models.get('vectorizer_available') and models.get('lr_available'):
-        available.append(("logistic_regression", "📈 Logistic Regression (Individual)"))
-    if models.get('vectorizer_available') and models.get('nb_available'):
-        available.append(("naive_bayes", "🎯 Multinomial Naive Bayes"))
-    return available
 # ============================================================================
-# SIDEBAR NAVIGATION
 # ============================================================================
-st.sidebar.title("🧭 Navigation")
-st.sidebar.markdown("Choose what you want to do:")
-page = st.sidebar.selectbox(
-    "Select Page:",
-    ["🏠 Home", "🔮 Single Prediction", "📁 Batch Processing", "⚖️ Model Comparison", "📊 Model Info", "❓ Help"]
-)
-# Load models
-models = load_models()
-# ============================================================================
-# HOME PAGE
-# ============================================================================
-if page == "🏠 Home":
-    st.markdown('<h1 class="main-header">🤖 ML Text Classification App</h1>', unsafe_allow_html=True)
-    st.markdown("""
-    Welcome to your machine learning web application! This app demonstrates sentiment analysis
-    using multiple trained models: **Logistic Regression** and **Multinomial Naive Bayes**.
-    """)
-    # App overview
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        st.markdown("""
-        ### 🔮 Single Prediction
-        - Enter text manually
-        - Choose between models
-        - Get instant predictions
-        - See confidence scores
-        """)
-    with col2:
-        st.markdown("""
-        ### 📁 Batch Processing
-        - Upload text files
-        - Process multiple texts
-        - Compare model performance
-        - Download results
-        """)
-    with col3:
-        st.markdown("""
-        ### ⚖️ Model Comparison
-        - Compare different models
-        - Side-by-side results
-        - Agreement analysis
-        - Performance metrics
-        """)
-    # Model status
-    st.subheader("📋 Model Status")
-    if models:
-        st.success("✅ Models loaded successfully!")
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            if models.get('pipeline_available'):
-                st.info("**📈 Logistic Regression**\n✅ Pipeline Available")
-            elif models.get('lr_available') and models.get('vectorizer_available'):
-                st.info("**📈 Logistic Regression**\n✅ Individual Components")
-            else:
-                st.warning("**📈 Logistic Regression**\n❌ Not Available")
-        with col2:
-            if models.get('nb_available') and models.get('vectorizer_available'):
-                st.info("**🎯 Multinomial NB**\n✅ Available")
-            else:
-                st.warning("**🎯 Multinomial NB**\n❌ Not Available")
-        with col3:
-            if models.get('vectorizer_available'):
-                st.info("**🔤 TF-IDF Vectorizer**\n✅ Available")
-            else:
-                st.warning("**🔤 TF-IDF Vectorizer**\n❌ Not Available")
-    else:
-        st.error("❌ Models not loaded. Please check model files.")
-# ============================================================================
-# SINGLE PREDICTION PAGE
-# ============================================================================
-elif page == "🔮 Single Prediction":
-    st.header("🔮 Make a Single Prediction")
-    st.markdown("Enter text below and select a model to get sentiment predictions.")
-    if models:
-        available_models = get_available_models(models)
-        if available_models:
-            # Model selection
-            model_choice = st.selectbox(
-                "Choose a model:",
-                options=[model[0] for model in available_models],
-                format_func=lambda x: next(model[1] for model in available_models if model[0] == x)
-            )
-            # Text input
-            user_input = st.text_area(
-                "Enter your text here:",
-                placeholder="Type or paste your text here (e.g., product review, feedback, comment)...",
-                height=150
-            )
-            # Character count
-            if user_input:
-                st.caption(f"Character count: {len(user_input)} | Word count: {len(user_input.split())}")
-            # Example texts
-            with st.expander("📝 Try these example texts"):
-                examples = [
-                    "This product is absolutely amazing! Best purchase I've made this year.",
-                    "Terrible quality, broke after one day. Complete waste of money.",
-                    "It's okay, nothing special but does the job.",
-                    "Outstanding customer service and fast delivery. Highly recommend!",
-                    "I love this movie! It's absolutely fantastic and entertaining."
-                ]
-                col1, col2 = st.columns(2)
-                for i, example in enumerate(examples):
-                    with col1 if i % 2 == 0 else col2:
-                        if st.button(f"Example {i+1}", key=f"example_{i}"):
-                            st.session_state.user_input = example
-                            st.rerun()
-            # Use session state for user input
-            if 'user_input' in st.session_state:
-                user_input = st.session_state.user_input
-            # Prediction button
-            if st.button("🚀 Predict", type="primary"):
-                if user_input.strip():
-                    with st.spinner('Analyzing sentiment...'):
-                        prediction, probabilities = make_prediction(user_input, model_choice, models)
-                        if prediction and probabilities is not None:
-                            # Display prediction
-                            col1, col2 = st.columns([3, 1])
-                            with col1:
-                                if prediction == "Positive":
-                                    st.success(f"🎯 Prediction: **{prediction} Sentiment**")
-                                else:
-                                    st.error(f"🎯 Prediction: **{prediction} Sentiment**")
-                            with col2:
-                                confidence = max(probabilities)
-                                st.metric("Confidence", f"{confidence:.1%}")
-                            # Create probability chart
-                            st.subheader("📊 Prediction Probabilities")
-                            # Detailed probabilities
-                            col1, col2 = st.columns(2)
-                            with col1:
-                                st.metric("😞 Negative", f"{probabilities[0]:.1%}")
-                            with col2:
-                                st.metric("😊 Positive", f"{probabilities[1]:.1%}")
-                            # Bar chart
-                            class_names = ['Negative', 'Positive']
-                            prob_df = pd.DataFrame({
-                                'Sentiment': class_names,
-                                'Probability': probabilities
-                            })
-                            st.bar_chart(prob_df.set_index('Sentiment'), height=300)
-                        else:
-                            st.error("Failed to make prediction")
-                else:
-                    st.warning("Please enter some text to classify!")
-        else:
-            st.error("No models available for prediction.")
     else:
-        st.warning("Models not loaded. Please check the model files.")
 # ============================================================================
-# BATCH PROCESSING PAGE
 # ============================================================================
-elif page == "📁 Batch Processing":
-    st.header("📁 Upload File for Batch Processing")
-    st.markdown("Upload a text file or CSV to process multiple texts at once.")
-    if models:
-        available_models = get_available_models(models)
-        if available_models:
-            # File upload
-            uploaded_file = st.file_uploader(
-                "Choose a file",
-                type=['txt', 'csv'],
-                help="Upload a .txt file (one text per line) or .csv file (text in first column)"
-            )
-            if uploaded_file:
-                # Model selection
-                model_choice = st.selectbox(
-                    "Choose model for batch processing:",
-                    options=[model[0] for model in available_models],
-                    format_func=lambda x: next(model[1] for model in available_models if model[0] == x)
                 )
-                # Process file
-                if st.button("📊 Process File"):
-                    try:
-                        # Read file content
-                        if uploaded_file.type == "text/plain":
-                            content = str(uploaded_file.read(), "utf-8")
-                            texts = [line.strip() for line in content.split('\n') if line.strip()]
-                        else:  # CSV
-                            df = pd.read_csv(uploaded_file)
-                            texts = df.iloc[:, 0].astype(str).tolist()
-                        if not texts:
-                            st.error("No text found in file")
-                        else:
-                            st.info(f"Processing {len(texts)} texts...")
-                            # Process all texts
-                            results = []
-                            progress_bar = st.progress(0)
-                            for i, text in enumerate(texts):
-                                if text.strip():
-                                    prediction, probabilities = make_prediction(text, model_choice, models)
-                                    if prediction and probabilities is not None:
-                                        results.append({
-                                            'Text': text[:100] + "..." if len(text) > 100 else text,
-                                            'Full_Text': text,
-                                            'Prediction': prediction,
-                                            'Confidence': f"{max(probabilities):.1%}",
-                                            'Negative_Prob': f"{probabilities[0]:.1%}",
-                                            'Positive_Prob': f"{probabilities[1]:.1%}"
-                                        })
-                                progress_bar.progress((i + 1) / len(texts))
-                            if results:
-                                # Display results
-                                st.success(f"✅ Processed {len(results)} texts successfully!")
-                                results_df = pd.DataFrame(results)
-                                # Summary statistics
-                                st.subheader("📊 Summary Statistics")
-                                col1, col2, col3, col4 = st.columns(4)
-                                positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
-                                negative_count = len(results) - positive_count
-                                avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
-                                with col1:
-                                    st.metric("Total Processed", len(results))
-                                with col2:
-                                    st.metric("😊 Positive", positive_count)
-                                with col3:
-                                    st.metric("😞 Negative", negative_count)
-                                with col4:
-                                    st.metric("Avg Confidence", f"{avg_confidence:.1f}%")
-                                # Results preview
-                                st.subheader("📋 Results Preview")
-                                st.dataframe(
-                                    results_df[['Text', 'Prediction', 'Confidence']],
-                                    use_container_width=True
-                                )
-                                # Download option
-                                csv = results_df.to_csv(index=False)
-                                st.download_button(
-                                    label="📥 Download Full Results",
-                                    data=csv,
-                                    file_name=f"predictions_{model_choice}_{uploaded_file.name}.csv",
-                                    mime="text/csv"
-                                )
-                            else:
-                                st.error("No valid texts could be processed")
-                    except Exception as e:
-                        st.error(f"Error processing file: {e}")
-            else:
-                st.info("Please upload a file to get started.")
-                # Show example file formats
-                with st.expander("📄 Example File Formats"):
-                    st.markdown("""
                     **Text File (.txt):**
                     ```
                     This product is amazing!
@@ -470,274 +577,190 @@ elif page == "📁 Batch Processing":
                     "Poor quality, not satisfied",review
                     ```
                     """)
-        else:
-            st.error("No models available for batch processing.")
-    else:
-        st.warning("Models not loaded. Please check the model files.")
-# ============================================================================
-# MODEL COMPARISON PAGE
-# ============================================================================
-elif page == "⚖️ Model Comparison":
-    st.header("⚖️ Compare Models")
-    st.markdown("Compare predictions from different models on the same text.")
-    if models:
-        available_models = get_available_models(models)
-        if len(available_models) >= 2:
-            # Text input for comparison
-            comparison_text = st.text_area(
-                "Enter text to compare models:",
-                placeholder="Enter text to see how different models perform...",
-                height=100
-            )
-            if st.button("📊 Compare All Models") and comparison_text.strip():
-                st.subheader("🔍 Model Comparison Results")
-                # Get predictions from all available models
-                comparison_results = []
-                for model_key, model_name in available_models:
-                    prediction, probabilities = make_prediction(comparison_text, model_key, models)
-                    if prediction and probabilities is not None:
-                        comparison_results.append({
-                            'Model': model_name,
-                            'Prediction': prediction,
-                            'Confidence': f"{max(probabilities):.1%}",
-                            'Negative %': f"{probabilities[0]:.1%}",
-                            'Positive %': f"{probabilities[1]:.1%}",
-                            'Raw_Probs': probabilities
-                        })
-                if comparison_results:
-                    # Comparison table
-                    comparison_df = pd.DataFrame(comparison_results)
-                    st.table(comparison_df[['Model', 'Prediction', 'Confidence', 'Negative %', 'Positive %']])
-                    # Agreement analysis
-                    predictions = [r['Prediction'] for r in comparison_results]
-                    if len(set(predictions)) == 1:
-                        st.success(f"✅ All models agree: **{predictions[0]} Sentiment**")
                     else:
-                        st.warning("⚠️ Models disagree on prediction")
-                        for result in comparison_results:
-                            model_name = result['Model'].split(' ')[1] if ' ' in result['Model'] else result['Model']
-                            st.write(f"- {model_name}: {result['Prediction']}")
-                    # Side-by-side probability charts
-                    st.subheader("📊 Detailed Probability Comparison")
-                    cols = st.columns(len(comparison_results))
-                    for i, result in enumerate(comparison_results):
-                        with cols[i]:
-                            model_name = result['Model']
-                            st.write(f"**{model_name}**")
-                            chart_data = pd.DataFrame({
-                                'Sentiment': ['Negative', 'Positive'],
-                                'Probability': result['Raw_Probs']
-                            })
-                            st.bar_chart(chart_data.set_index('Sentiment'))
-                else:
-                    st.error("Failed to get predictions from models")
-        elif len(available_models) == 1:
-            st.info("Only one model available. Use Single Prediction page for detailed analysis.")
-        else:
-            st.error("No models available for comparison.")
-    else:
-        st.warning("Models not loaded. Please check the model files.")
-# ============================================================================
-# MODEL INFO PAGE
-# ============================================================================
-elif page == "📊 Model Info":
-    st.header("📊 Model Information")
-    if models:
-        st.success("✅ Models are loaded and ready!")
-        # Model details
-        st.subheader("🔧 Available Models")
-        col1, col2 = st.columns(2)
-        with col1:
-            st.markdown("""
-            ### 📈 Logistic Regression
-            **Type:** Linear Classification Model
-            **Algorithm:** Logistic Regression with L2 regularization
-            **Features:** TF-IDF vectors (unigrams + bigrams)
-            **Strengths:**
-            - Fast prediction
-            - Interpretable coefficients
-            - Good baseline performance
-            - Handles sparse features well
-            """)
-        with col2:
-            st.markdown("""
-            ### 🎯 Multinomial Naive Bayes
-            **Type:** Probabilistic Classification Model
-            **Algorithm:** Multinomial Naive Bayes
-            **Features:** TF-IDF vectors (unigrams + bigrams)
-            **Strengths:**
-            - Fast training and prediction
-            - Works well with small datasets
-            - Good performance on text classification
-            - Natural probabilistic outputs
-            """)
-        # Feature engineering info
-        st.subheader("🔤 Feature Engineering")
-        st.markdown("""
-        **Vectorization:** TF-IDF (Term Frequency-Inverse Document Frequency)
-        - **Max Features:** 5,000 most important terms
-        - **N-grams:** Unigrams (1-word) and Bigrams (2-word phrases)
-        - **Min Document Frequency:** 2 (terms must appear in at least 2 documents)
-        - **Stop Words:** English stop words removed
-        """)
-        # File status
-        st.subheader("📁 Model Files Status")
-        file_status = []
-        files_to_check = [
-            ("sentiment_analysis_pipeline.pkl", "Complete LR Pipeline", models.get('pipeline_available', False)),
-            ("tfidf_vectorizer.pkl", "TF-IDF Vectorizer", models.get('vectorizer_available', False)),
-            ("logistic_regression_model.pkl", "LR Classifier", models.get('lr_available', False)),
-            ("multinomial_nb_model.pkl", "NB Classifier", models.get('nb_available', False))
-        ]
-        for filename, description, status in files_to_check:
-            file_status.append({
-                "File": filename,
-                "Description": description,
-                "Status": "✅ Loaded" if status else "❌ Not Found"
-            })
-        st.table(pd.DataFrame(file_status))
-        # Training information
-        st.subheader("📚 Training Information")
-        st.markdown("""
-        **Dataset:** Product Review Sentiment Analysis
-        - **Classes:** Positive and Negative sentiment
-        - **Preprocessing:** Text cleaning, tokenization, TF-IDF vectorization
-        - **Training:** Both models trained on same feature set for fair comparison
         """)
-    else:
-        st.warning("Models not loaded. Please check model files in the 'models/' directory.")
 # ============================================================================
-# HELP PAGE
 # ============================================================================
-elif page == "❓ Help":
-    st.header("❓ How to Use This App")
-    with st.expander("🔮 Single Prediction"):
-        st.write("""
-        1. **Select a model** from the dropdown (Logistic Regression or Multinomial Naive Bayes)
-        2. **Enter text** in the text area (product reviews, comments, feedback)
-        3. **Click 'Predict'** to get sentiment analysis results
-        4. **View results:** prediction, confidence score, and probability breakdown
-        5. **Try examples:** Use the provided example texts to test the models
-        """)
-    with st.expander("📁 Batch Processing"):
-        st.write("""
-        1. **Prepare your file:**
-           - **.txt file:** One text per line
-           - **.csv file:** Text in the first column
-        2. **Upload the file** using the file uploader
-        3. **Select a model** for processing
-        4. **Click 'Process File'** to analyze all texts
-        5. **Download results** as CSV file with predictions and probabilities
-        """)
-    with st.expander("⚖️ Model Comparison"):
-        st.write("""
-        1. **Enter text** you want to analyze
-        2. **Click 'Compare All Models'** to get predictions from both models
-        3. **View comparison table** showing predictions and confidence scores
-        4. **Analyze agreement:** See if models agree or disagree
-        5. **Compare probabilities:** Side-by-side probability charts
-        """)
-    with st.expander("🔧 Troubleshooting"):
-        st.write("""
-        **Common Issues and Solutions:**
-        **Models not loading:**
-        - Ensure model files (.pkl) are in the 'models/' directory
-        - Check that required files exist:
-          - tfidf_vectorizer.pkl (required)
-          - sentiment_analysis_pipeline.pkl (for LR pipeline)
-          - logistic_regression_model.pkl (for LR individual)
-          - multinomial_nb_model.pkl (for NB model)
-        **Prediction errors:**
-        - Make sure input text is not empty
-        - Try shorter texts if getting memory errors
-        - Check that text contains readable characters
-        **File upload issues:**
-        - Ensure file format is .txt or .csv
-        - Check file encoding (should be UTF-8)
-        - Verify CSV has text in the first column
-        """)
-    # System information
-    st.subheader("💻 Your Project Structure")
-    st.code("""
-    streamlit_ml_app/
-    ├── app.py                              # Main application
-    ├── requirements.txt                    # Dependencies
-    ├── models/                            # Model files
-    │   ├── sentiment_analysis_pipeline.pkl # LR complete pipeline
-    │   ├── tfidf_vectorizer.pkl           # Feature extraction
-    │   ├── logistic_regression_model.pkl  # LR classifier
-    │   └── multinomial_nb_model.pkl       # NB classifier
-    └── sample_data/                       # Sample files
-        ├── sample_texts.txt
-        └── sample_data.csv
-    """)
-# ============================================================================
-# FOOTER
-# ============================================================================
-st.sidebar.markdown("---")
-st.sidebar.markdown("### 📚 App Information")
-st.sidebar.info("""
-**ML Text Classification App**
-Built with Streamlit
-**Models:**
-- 📈 Logistic Regression
-- 🎯 Multinomial Naive Bayes
-**Framework:** scikit-learn
-**Deployment:** Streamlit Cloud Ready
-""")
-st.markdown("---")
-st.markdown("""
-<div style='text-align: center; color: #666666;'>
-    Built with ❤️ using Streamlit | Machine Learning Text Classification Demo | By Maaz Amjad<br>
-    <small>As a part of the courses series **Introduction to Large Language Models/Intro to AI Agents**</small><br>
-    <small>This app demonstrates sentiment analysis using trained ML models</small>
-</div>
-""", unsafe_allow_html=True)

+# GRADIO ML CLASSIFICATION APP - DUAL MODEL SUPPORT
 # =====================================================
+import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import matplotlib.pyplot as plt
 import seaborn as sns
+import io
+import base64
+from typing import Tuple, List, Optional
+import warnings
+warnings.filterwarnings('ignore')
 # ============================================================================
 # MODEL LOADING SECTION
 # ============================================================================
 def load_models():
+    """Load all available ML models"""
     models = {}
     try:
         individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
         if not (pipeline_ready or individual_ready):
             return None
         return models
     except Exception as e:
+        print(f"Error loading models: {e}")
         return None
+# Load models globally
+MODELS = load_models()
 # ============================================================================
+# PREDICTION FUNCTIONS
 # ============================================================================
+def make_prediction(text: str, model_choice: str) -> Tuple[Optional[str], Optional[np.ndarray], str]:
     """Make prediction using the selected model"""
+    if MODELS is None:
+        return None, None, "❌ No models loaded!"
+    if not text or not text.strip():
+        return None, None, "⚠️ Please enter some text!"
     try:
         prediction = None
         probabilities = None
+        if model_choice == "Logistic Regression":
+            if MODELS.get('pipeline_available'):
+                # Use the complete pipeline (Logistic Regression)
+                prediction = MODELS['pipeline'].predict([text])[0]
+                probabilities = MODELS['pipeline'].predict_proba([text])[0]
+            elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
                 # Use individual components
+                X = MODELS['vectorizer'].transform([text])
+                prediction = MODELS['logistic_regression'].predict(X)[0]
+                probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
+        elif model_choice == "Multinomial Naive Bayes":
+            if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
                 # Use individual components for NB
+                X = MODELS['vectorizer'].transform([text])
+                prediction = MODELS['naive_bayes'].predict(X)[0]
+                probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
         if prediction is not None and probabilities is not None:
             # Convert to readable format
             class_names = ['Negative', 'Positive']
             prediction_label = class_names[prediction]
+            status = f"✅ Prediction successful!"
+            return prediction_label, probabilities, status
         else:
+            return None, None, f"❌ Model '{model_choice}' not available!"
     except Exception as e:
+        return None, None, f"❌ Error making prediction: {str(e)}"
+def get_available_models() -> List[str]:
     """Get list of available models for selection"""
+    if MODELS is None:
+        return ["No models available"]
     available = []
+    if MODELS.get('pipeline_available'):
+        available.append("Logistic Regression")
+    elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
+        available.append("Logistic Regression")
+    if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
+        available.append("Multinomial Naive Bayes")
+    return available if available else ["No models available"]
+def create_probability_plot(probabilities: np.ndarray) -> plt.Figure:
+    """Create a probability visualization"""
+    fig, ax = plt.subplots(figsize=(8, 5))
+    classes = ['Negative 😞', 'Positive 😊']
+    colors = ['#ff6b6b', '#51cf66']
+    bars = ax.bar(classes, probabilities, color=colors, alpha=0.8, edgecolor='white', linewidth=2)
+    # Add percentage labels on bars
+    for bar, prob in zip(bars, probabilities):
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+                f'{prob:.1%}', ha='center', va='bottom', fontweight='bold', fontsize=12)
+    ax.set_ylim(0, 1.1)
+    ax.set_ylabel('Probability', fontsize=12, fontweight='bold')
+    ax.set_title('Sentiment Prediction Probabilities', fontsize=14, fontweight='bold', pad=20)
+    ax.grid(axis='y', alpha=0.3)
+    # Style improvements
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.set_facecolor('#f8f9fa')
+    plt.tight_layout()
+    return fig
 # ============================================================================
+# INTERFACE FUNCTIONS
 # ============================================================================
+def predict_single_text(text: str, model_choice: str) -> Tuple[str, str, str, str, Optional[plt.Figure]]:
+    """Single text prediction interface"""
+    prediction, probabilities, status = make_prediction(text, model_choice)
+    if prediction and probabilities is not None:
+        confidence = max(probabilities)
+        # Format results
+        result_text = f"🎯 **Prediction: {prediction} Sentiment**"
+        confidence_text = f"🎯 **Confidence: {confidence:.1%}**"
+        # Detailed probabilities
+        prob_details = f"""
+        📊 **Detailed Probabilities:**
+        - 😞 Negative: {probabilities[0]:.1%}
+        - 😊 Positive: {probabilities[1]:.1%}
+        """
+        # Confidence interpretation
+        if confidence >= 0.8:
+            interpretation = "🔥 **High Confidence**: The model is very confident about this prediction."
+        elif confidence >= 0.6:
+            interpretation = "✅ **Medium Confidence**: The model is reasonably confident about this prediction."
+        else:
+            interpretation = "⚠️ **Low Confidence**: The model is uncertain. Consider the context carefully."
+        # Create plot
+        plot = create_probability_plot(probabilities)
+        return result_text, confidence_text, prob_details, interpretation, plot
+    else:
+        return status, "", "", "", None
+def process_batch_file(file, model_choice: str, max_texts: int = 100) -> Tuple[str, Optional[str]]:
+    """Process batch file for multiple predictions"""
+    if file is None:
+        return "⚠️ Please upload a file!", None
+    if MODELS is None:
+        return "❌ No models loaded!", None
+    try:
+        # Read file content
+        if file.name.endswith('.txt'):
+            content = file.read().decode('utf-8')
+            texts = [line.strip() for line in content.split('\n') if line.strip()]
+        elif file.name.endswith('.csv'):
+            df = pd.read_csv(file)
+            texts = df.iloc[:, 0].astype(str).tolist()
+        else:
+            return "❌ Unsupported file format! Please use .txt or .csv files.", None
+        if not texts:
+            return "❌ No text found in file!", None
+        # Limit number of texts
+        if len(texts) > max_texts:
+            texts = texts[:max_texts]
+            status_msg = f"⚠️ Processing limited to {max_texts} texts due to size constraints.\n"
+        else:
+            status_msg = ""
+        # Process all texts
+        results = []
+        for i, text in enumerate(texts):
+            if text.strip():
+                prediction, probabilities, _ = make_prediction(text, model_choice)
+                if prediction and probabilities is not None:
+                    results.append({
+                        'Index': i + 1,
+                        'Text': text[:100] + "..." if len(text) > 100 else text,
+                        'Prediction': prediction,
+                        'Confidence': f"{max(probabilities):.1%}",
+                        'Negative_Prob': f"{probabilities[0]:.1%}",
+                        'Positive_Prob': f"{probabilities[1]:.1%}"
+                    })
+        if results:
+            # Create results DataFrame
+            results_df = pd.DataFrame(results)
+            # Generate summary
+            positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
+            negative_count = len(results) - positive_count
+            avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
+            summary = f"""
+            {status_msg}✅ **Successfully processed {len(results)} texts!**
+            📊 **Summary Statistics:**
+            - Total Processed: {len(results)}
+            - 😊 Positive: {positive_count} ({positive_count/len(results):.1%})
+            - 😞 Negative: {negative_count} ({negative_count/len(results):.1%})
+            - Average Confidence: {avg_confidence:.1f}%
+            """
+            # Convert DataFrame to CSV string for download
+            csv_string = results_df.to_csv(index=False)
+            return summary, csv_string
+        else:
+            return "❌ No valid texts could be processed!", None
+    except Exception as e:
+        return f"❌ Error processing file: {str(e)}", None
+def compare_models(text: str) -> Tuple[str, Optional[plt.Figure]]:
+    """Compare predictions from different models"""
+    if MODELS is None:
+        return "❌ No models loaded!", None
+    if not text or not text.strip():
+        return "⚠️ Please enter some text to compare!", None
+    available_models = get_available_models()
+    if len(available_models) < 2:
+        return "ℹ️ Need at least 2 models for comparison. Only one model available.", None
+    comparison_results = []
+    for model_name in available_models:
+        prediction, probabilities, _ = make_prediction(text, model_name)
+        if prediction and probabilities is not None:
+            comparison_results.append({
+                'Model': model_name,
+                'Prediction': prediction,
+                'Confidence': f"{max(probabilities):.1%}",
+                'Negative %': f"{probabilities[0]:.1%}",
+                'Positive %': f"{probabilities[1]:.1%}",
+                'Raw_Probs': probabilities
+            })
+    if comparison_results:
+        # Create comparison text
+        comparison_text = "🔍 **Model Comparison Results:**\n\n"
+        for result in comparison_results:
+            comparison_text += f"**{result['Model']}:**\n"
+            comparison_text += f"- Prediction: {result['Prediction']}\n"
+            comparison_text += f"- Confidence: {result['Confidence']}\n"
+            comparison_text += f"- Negative: {result['Negative %']}, Positive: {result['Positive %']}\n\n"
+        # Agreement analysis
+        predictions = [r['Prediction'] for r in comparison_results]
+        if len(set(predictions)) == 1:
+            comparison_text += f"✅ **Perfect Agreement**: All models predict **{predictions[0]} Sentiment**"
+        else:
+            comparison_text += "⚠️ **Models Disagree** on prediction:\n"
+            for result in comparison_results:
+                comparison_text += f"- {result['Model']}: {result['Prediction']}\n"
+        # Create side-by-side comparison plot
+        fig, axes = plt.subplots(1, len(comparison_results), figsize=(6*len(comparison_results), 5))
+        if len(comparison_results) == 1:
+            axes = [axes]
+        for i, result in enumerate(comparison_results):
+            ax = axes[i]
+            classes = ['Negative', 'Positive']
+            colors = ['#ff6b6b', '#51cf66']
+            bars = ax.bar(classes, result['Raw_Probs'], color=colors, alpha=0.8)
+            # Add percentage labels
+            for bar, prob in zip(bars, result['Raw_Probs']):
+                height = bar.get_height()
+                ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
+                       f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')
+            ax.set_ylim(0, 1.1)
+            ax.set_title(f"{result['Model']}\n{result['Prediction']}", fontweight='bold')
+            ax.grid(axis='y', alpha=0.3)
+            # Style
+            ax.spines['top'].set_visible(False)
+            ax.spines['right'].set_visible(False)
+        plt.tight_layout()
+        return comparison_text, fig
     else:
+        return "❌ Failed to get predictions from models!", None
+def get_model_info() -> str:
+    """Get model information and status"""
+    if MODELS is None:
+        return """
+        ❌ **No models loaded!**
+        Please ensure you have the following files in the 'models/' directory:
+        - sentiment_analysis_pipeline.pkl (complete pipeline), OR
+        - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
+        - tfidf_vectorizer.pkl + multinomial_nb_model.pkl
+        """
+    info_text = "✅ **Models are loaded and ready!**\n\n"
+    # Available models
+    info_text += "🔧 **Available Models:**\n\n"
+    if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
+        info_text += """
+        **📈 Logistic Regression**
+        - Type: Linear Classification Model
+        - Algorithm: Logistic Regression with L2 regularization
+        - Features: TF-IDF vectors (unigrams + bigrams)
+        - Strengths: Fast prediction, interpretable, good baseline
+        """
+    if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
+        info_text += """
+        **🎯 Multinomial Naive Bayes**
+        - Type: Probabilistic Classification Model
+        - Algorithm: Multinomial Naive Bayes
+        - Features: TF-IDF vectors (unigrams + bigrams)
+        - Strengths: Fast training, works with small datasets
+        """
+    # Feature engineering
+    info_text += """
+    🔤 **Feature Engineering:**
+    - Vectorization: TF-IDF (Term Frequency-Inverse Document Frequency)
+    - Max Features: 5,000 most important terms
+    - N-grams: Unigrams (1-word) and Bigrams (2-word phrases)
+    - Min Document Frequency: 2 (terms must appear in at least 2 documents)
+    - Stop Words: English stop words removed
+    """
+    # File status
+    info_text += "📁 **Model Files Status:**\n\n"
+    files_to_check = [
+        ("sentiment_analysis_pipeline.pkl", "Complete LR Pipeline", MODELS.get('pipeline_available', False)),
+        ("tfidf_vectorizer.pkl", "TF-IDF Vectorizer", MODELS.get('vectorizer_available', False)),
+        ("logistic_regression_model.pkl", "LR Classifier", MODELS.get('lr_available', False)),
+        ("multinomial_nb_model.pkl", "NB Classifier", MODELS.get('nb_available', False))
+    ]
+    for filename, description, status in files_to_check:
+        status_icon = "✅" if status else "❌"
+        info_text += f"- {filename}: {description} {status_icon}\n"
+    info_text += """
+    📚 **Training Information:**
+    - Dataset: Product Review Sentiment Analysis
+    - Classes: Positive and Negative sentiment
+    - Preprocessing: Text cleaning, tokenization, TF-IDF vectorization
+    - Training: Both models trained on same feature set for fair comparison
+    """
+    return info_text
 # ============================================================================
+# GRADIO INTERFACE
 # ============================================================================
+def create_interface():
+    """Create the main Gradio interface"""
+    # Custom CSS for better styling
+    css = """
+    .gradio-container {
+        font-family: 'Arial', sans-serif;
+    }
+    .main-header {
+        text-align: center;
+        color: #1f77b4;
+        font-size: 2.5rem;
+        margin-bottom: 1rem;
+    }
+    .tab-nav {
+        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
+    }
+    """
+    with gr.Blocks(css=css, title="ML Text Classification App", theme=gr.themes.Soft()) as app:
+        # Header
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🤖 ML Text Classification App</h1>
+            <p style="font-size: 1.2rem; color: #666;">
+                Advanced Sentiment Analysis with Multiple ML Models
+            </p>
+        </div>
+        """)
+        # Main tabbed interface
+        with gr.Tabs():
+            # ============================================================================
+            # SINGLE PREDICTION TAB
+            # ============================================================================
+            with gr.Tab("🔮 Single Prediction"):
+                gr.Markdown("### Enter text below and select a model to get sentiment predictions")
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        model_dropdown = gr.Dropdown(
+                            choices=get_available_models(),
+                            value=get_available_models()[0] if get_available_models() else None,
+                            label="Choose a model",
+                            info="Select the ML model for prediction"
+                        )
+                        text_input = gr.Textbox(
+                            lines=5,
+                            placeholder="Type or paste your text here (e.g., product review, feedback, comment)...",
+                            label="Enter your text here",
+                            info="Enter any text you want to analyze for sentiment"
+                        )
+                        # Example texts
+                        with gr.Row():
+                            example_btn1 = gr.Button("Example 1", size="sm")
+                            example_btn2 = gr.Button("Example 2", size="sm")
+                            example_btn3 = gr.Button("Example 3", size="sm")
+                        predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary", size="lg")
+                    with gr.Column(scale=2):
+                        prediction_result = gr.Markdown(label="Prediction Result")
+                        confidence_result = gr.Markdown(label="Confidence")
+                        prob_details = gr.Markdown(label="Detailed Probabilities")
+                        interpretation = gr.Markdown(label="Interpretation")
+                with gr.Row():
+                    prob_plot = gr.Plot(label="Probability Visualization")
+                # Example text handlers
+                example_btn1.click(
+                    lambda: "This product is absolutely amazing! Best purchase I've made this year.",
+                    outputs=text_input
+                )
+                example_btn2.click(
+                    lambda: "Terrible quality, broke after one day. Complete waste of money.",
+                    outputs=text_input
+                )
+                example_btn3.click(
+                    lambda: "It's okay, nothing special but does the job.",
+                    outputs=text_input
                 )
+                # Prediction handler
+                predict_btn.click(
+                    predict_single_text,
+                    inputs=[text_input, model_dropdown],
+                    outputs=[prediction_result, confidence_result, prob_details, interpretation, prob_plot]
+                )
+            # ============================================================================
+            # BATCH PROCESSING TAB
+            # ============================================================================
+            with gr.Tab("📁 Batch Processing"):
+                gr.Markdown("### Upload a text file or CSV to process multiple texts at once")
+                with gr.Row():
+                    with gr.Column():
+                        file_upload = gr.File(
+                            label="Choose a file",
+                            file_types=[".txt", ".csv"],
+                            info="Upload a .txt file (one text per line) or .csv file (text in first column)"
+                        )
+                        batch_model_dropdown = gr.Dropdown(
+                            choices=get_available_models(),
+                            value=get_available_models()[0] if get_available_models() else None,
+                            label="Choose model for batch processing"
+                        )
+                        max_texts_slider = gr.Slider(
+                            minimum=10,
+                            maximum=1000,
+                            value=100,
+                            step=10,
+                            label="Maximum texts to process",
+                            info="Limit processing for performance"
+                        )
+                        process_btn = gr.Button("📊 Process File", variant="primary", size="lg")
+                    with gr.Column():
+                        batch_results = gr.Markdown(label="Processing Results")
+                        download_file = gr.File(
+                            label="Download Results",
+                            visible=False
+                        )
+                # File format examples
+                with gr.Accordion("📄 Example File Formats", open=False):
+                    gr.Markdown("""
                     **Text File (.txt):**
                     ```
                     This product is amazing!
                     "Poor quality, not satisfied",review
                     ```
                     """)
+                # Batch processing handler
+                def handle_batch_processing(file, model_choice, max_texts):
+                    summary, csv_data = process_batch_file(file, model_choice, max_texts)
+                    if csv_data:
+                        # Save CSV data to a temporary file for download
+                        csv_file = gr.File(value=io.StringIO(csv_data), visible=True)
+                        return summary, csv_file
                     else:
+                        return summary, gr.File(visible=False)
+                process_btn.click(
+                    handle_batch_processing,
+                    inputs=[file_upload, batch_model_dropdown, max_texts_slider],
+                    outputs=[batch_results, download_file]
+                )
+            # ============================================================================
+            # MODEL COMPARISON TAB
+            # ============================================================================
+            with gr.Tab("⚖️ Model Comparison"):
+                gr.Markdown("### Compare predictions from different models on the same text")
+                with gr.Row():
+                    with gr.Column():
+                        comparison_text = gr.Textbox(
+                            lines=4,
+                            placeholder="Enter text to see how different models perform...",
+                            label="Enter text to compare models",
+                            info="Try texts with mixed sentiment for interesting comparisons"
+                        )
+                        compare_btn = gr.Button("🔍 Compare All Models", variant="primary", size="lg")
+                        # Quick examples for comparison
+                        with gr.Row():
+                            comp_ex1 = gr.Button("Mixed Example 1", size="sm")
+                            comp_ex2 = gr.Button("Mixed Example 2", size="sm")
+                            comp_ex3 = gr.Button("Mixed Example 3", size="sm")
+                    with gr.Column():
+                        comparison_results = gr.Markdown(label="Comparison Results")
+                with gr.Row():
+                    comparison_plot = gr.Plot(label="Model Comparison Visualization")
+                # Comparison example handlers
+                comp_ex1.click(
+                    lambda: "This movie was okay but not great.",
+                    outputs=comparison_text
+                )
+                comp_ex2.click(
+                    lambda: "The product is fine, I guess.",
+                    outputs=comparison_text
+                )
+                comp_ex3.click(
+                    lambda: "Could be better, could be worse.",
+                    outputs=comparison_text
+                )
+                # Comparison handler
+                compare_btn.click(
+                    compare_models,
+                    inputs=comparison_text,
+                    outputs=[comparison_results, comparison_plot]
+                )
+            # ============================================================================
+            # MODEL INFO TAB
+            # ============================================================================
+            with gr.Tab("📊 Model Info"):
+                model_info_display = gr.Markdown(
+                    value=get_model_info(),
+                    label="Model Information"
+                )
+                refresh_info_btn = gr.Button("🔄 Refresh Info", size="sm")
+                refresh_info_btn.click(
+                    get_model_info,
+                    outputs=model_info_display
+                )
+            # ============================================================================
+            # HELP TAB
+            # ============================================================================
+            with gr.Tab("❓ Help"):
+                gr.Markdown("""
+                ## 📚 How to Use This App
+                ### 🔮 Single Prediction
+                1. **Select a model** from the dropdown (Logistic Regression or Multinomial Naive Bayes)
+                2. **Enter text** in the text area (product reviews, comments, feedback)
+                3. **Click 'Analyze Sentiment'** to get sentiment analysis results
+                4. **View results:** prediction, confidence score, and probability breakdown
+                5. **Try examples:** Use the provided example buttons to test the models
+                ### 📁 Batch Processing
+                1. **Prepare your file:**
+                   - **.txt file:** One text per line
+                   - **.csv file:** Text in the first column
+                2. **Upload the file** using the file uploader
+                3. **Select a model** for processing
+                4. **Adjust max texts** slider if needed
+                5. **Click 'Process File'** to analyze all texts
+                6. **Download results** as CSV file with predictions and probabilities
+                ### ⚖️ Model Comparison
+                1. **Enter text** you want to analyze
+                2. **Click 'Compare All Models'** to get predictions from both models
+                3. **View comparison results** showing predictions and confidence scores
+                4. **Analyze agreement:** See if models agree or disagree
+                5. **Compare visualizations:** Side-by-side probability charts
+                ### 🔧 Troubleshooting
+                **Models not loading:**
+                - Ensure model files (.pkl) are in the 'models/' directory
+                - Check that required files exist:
+                  - tfidf_vectorizer.pkl (required)
+                  - sentiment_analysis_pipeline.pkl (for LR pipeline)
+                  - logistic_regression_model.pkl (for LR individual)
+                  - multinomial_nb_model.pkl (for NB model)
+                **Prediction errors:**
+                - Make sure input text is not empty
+                - Try shorter texts if getting memory errors
+                - Check that text contains readable characters
+                **File upload issues:**
+                - Ensure file format is .txt or .csv
+                - Check file encoding (should be UTF-8)
+                - Verify CSV has text in the first column
+                ### 💻 Project Structure
+                ```
+                gradio_ml_app/
+                ├── app.py                              # Main application
+                ├── requirements.txt                    # Dependencies
+                ├── models/                            # Model files
+                │   ├── sentiment_analysis_pipeline.pkl # LR complete pipeline
+                │   ├── tfidf_vectorizer.pkl           # Feature extraction
+                │   ├── logistic_regression_model.pkl  # LR classifier
+                │   └── multinomial_nb_model.pkl       # NB classifier
+                └── sample_data/                       # Sample files
+                    ├── sample_texts.txt
+                    └── sample_data.csv
+                ```
+                """)
+        # Footer
+        gr.HTML("""
+        <div style='text-align: center; color: #666666; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee;'>
+            <p><strong>🤖 ML Text Classification App</strong></p>
+            <p>Built with ❤️ using Gradio | Machine Learning Text Classification Demo | By Maaz Amjad</p>
+            <p><small>As a part of the courses series <strong>Introduction to Large Language Models/Intro to AI Agents</strong></small></p>
+            <p><small>This app demonstrates sentiment analysis using trained ML models</small></p>
+        </div>
         """)
+    return app
 # ============================================================================
+# MAIN EXECUTION
 # ============================================================================
+if __name__ == "__main__":
+    # Check model status on startup
+    if MODELS is None:
+        print("⚠️ Warning: No models loaded!")
+        print("Please ensure you have the required model files in the 'models/' directory.")
+    else:
+        available_models = get_available_models()
+        print(f"✅ Successfully loaded {len(available_models)} model(s): {', '.join(available_models)}")
+    # Create and launch the interface
+    app = create_interface()
+    # Launch with custom settings
+    app.launch(
+        server_name="0.0.0.0",  # Make accessible from any IP
+        server_port=7860,       # Default Gradio port
+        share=False,            # Set to True to create public link
+        debug=True,             # Enable debug mode
+        show_error=True,        # Show detailed errors
+        inbrowser=True          # Open browser automatically
+    )