Spaces:

maazamjad
/

sent_analysis

Sleeping

App Files Files Community

maazamjad commited on Jun 22

Commit

9cbecf4

verified ·

1 Parent(s): 2bef3bb

Update app.py

Browse files

Files changed (1) hide show

app.py +220 -435

app.py CHANGED Viewed

@@ -1,20 +1,20 @@
-# GRADIO ML CLASSIFICATION APP - DUAL MODEL SUPPORT
-# =====================================================
 import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import matplotlib.pyplot as plt
-import seaborn as sns
-import io
-import base64
-from typing import Tuple, List, Optional
 import warnings
 warnings.filterwarnings('ignore')
 # ============================================================================
-# MODEL LOADING SECTION
 # ============================================================================
 def load_models():
@@ -22,42 +22,39 @@ def load_models():
     models = {}
     try:
-        # Load the main pipeline (Logistic Regression)
         try:
             models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl')
             models['pipeline_available'] = True
-        except FileNotFoundError:
             models['pipeline_available'] = False
-        # Load TF-IDF vectorizer
         try:
             models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl')
             models['vectorizer_available'] = True
-        except FileNotFoundError:
             models['vectorizer_available'] = False
-        # Load Logistic Regression model
         try:
             models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl')
             models['lr_available'] = True
-        except FileNotFoundError:
             models['lr_available'] = False
-        # Load Multinomial Naive Bayes model
         try:
             models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl')
             models['nb_available'] = True
-        except FileNotFoundError:
             models['nb_available'] = False
-        # Check if at least one complete setup is available
         pipeline_ready = models['pipeline_available']
         individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
-        if not (pipeline_ready or individual_ready):
-            return None
-        return models
     except Exception as e:
         print(f"Error loading models: {e}")
@@ -67,93 +64,77 @@ def load_models():
 MODELS = load_models()
 # ============================================================================
-# PREDICTION FUNCTIONS
 # ============================================================================
-def make_prediction(text: str, model_choice: str) -> Tuple[Optional[str], Optional[np.ndarray], str]:
-    """Make prediction using the selected model"""
     if MODELS is None:
-        return None, None, "❌ No models loaded!"
-    if not text or not text.strip():
-        return None, None, "⚠️ Please enter some text!"
     try:
-        prediction = None
-        probabilities = None
         if model_choice == "Logistic Regression":
             if MODELS.get('pipeline_available'):
-                # Use the complete pipeline (Logistic Regression)
                 prediction = MODELS['pipeline'].predict([text])[0]
                 probabilities = MODELS['pipeline'].predict_proba([text])[0]
             elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
-                # Use individual components
                 X = MODELS['vectorizer'].transform([text])
                 prediction = MODELS['logistic_regression'].predict(X)[0]
                 probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
         elif model_choice == "Multinomial Naive Bayes":
             if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
-                # Use individual components for NB
                 X = MODELS['vectorizer'].transform([text])
                 prediction = MODELS['naive_bayes'].predict(X)[0]
                 probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
-        if prediction is not None and probabilities is not None:
-            # Convert to readable format
-            class_names = ['Negative', 'Positive']
-            prediction_label = class_names[prediction]
-            status = f"✅ Prediction successful!"
-            return prediction_label, probabilities, status
-        else:
-            return None, None, f"❌ Model '{model_choice}' not available!"
     except Exception as e:
-        return None, None, f"❌ Error making prediction: {str(e)}"
-def get_available_models() -> List[str]:
-    """Get list of available models for selection"""
-    if MODELS is None:
-        return ["No models available"]
-    available = []
-    if MODELS.get('pipeline_available'):
-        available.append("Logistic Regression")
-    elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
-        available.append("Logistic Regression")
-    if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
-        available.append("Multinomial Naive Bayes")
-    return available if available else ["No models available"]
-def create_probability_plot(probabilities: np.ndarray) -> plt.Figure:
-    """Create a probability visualization"""
     fig, ax = plt.subplots(figsize=(8, 5))
-    classes = ['Negative 😞', 'Positive 😊']
     colors = ['#ff6b6b', '#51cf66']
-    bars = ax.bar(classes, probabilities, color=colors, alpha=0.8, edgecolor='white', linewidth=2)
-    # Add percentage labels on bars
     for bar, prob in zip(bars, probabilities):
         height = bar.get_height()
         ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
-                f'{prob:.1%}', ha='center', va='bottom', fontweight='bold', fontsize=12)
     ax.set_ylim(0, 1.1)
-    ax.set_ylabel('Probability', fontsize=12, fontweight='bold')
-    ax.set_title('Sentiment Prediction Probabilities', fontsize=14, fontweight='bold', pad=20)
     ax.grid(axis='y', alpha=0.3)
-    # Style improvements
-    ax.spines['top'].set_visible(False)
-    ax.spines['right'].set_visible(False)
-    ax.set_facecolor('#f8f9fa')
     plt.tight_layout()
     return fig
@@ -161,7 +142,7 @@ def create_probability_plot(probabilities: np.ndarray) -> plt.Figure:
 # INTERFACE FUNCTIONS
 # ============================================================================
-def predict_single_text(text: str, model_choice: str) -> Tuple[str, str, str, str, Optional[plt.Figure]]:
     """Single text prediction interface"""
     prediction, probabilities, status = make_prediction(text, model_choice)
@@ -169,63 +150,56 @@ def predict_single_text(text: str, model_choice: str) -> Tuple[str, str, str, st
         confidence = max(probabilities)
         # Format results
-        result_text = f"🎯 **Prediction: {prediction} Sentiment**"
-        confidence_text = f"🎯 **Confidence: {confidence:.1%}**"
-        # Detailed probabilities
-        prob_details = f"""
-        📊 **Detailed Probabilities:**
-        - 😞 Negative: {probabilities[0]:.1%}
-        - 😊 Positive: {probabilities[1]:.1%}
-        """
-        # Confidence interpretation
         if confidence >= 0.8:
-            interpretation = "🔥 **High Confidence**: The model is very confident about this prediction."
         elif confidence >= 0.6:
-            interpretation = "✅ **Medium Confidence**: The model is reasonably confident about this prediction."
         else:
-            interpretation = "⚠️ **Low Confidence**: The model is uncertain. Consider the context carefully."
         # Create plot
-        plot = create_probability_plot(probabilities)
-        return result_text, confidence_text, prob_details, interpretation, plot
     else:
-        return status, "", "", "", None
-def process_batch_file(file, model_choice: str, max_texts: int = 100) -> Tuple[str, Optional[str]]:
-    """Process batch file for multiple predictions"""
     if file is None:
-        return "⚠️ Please upload a file!", None
     if MODELS is None:
-        return "❌ No models loaded!", None
     try:
-        # Read file content
         if file.name.endswith('.txt'):
-            content = file.read().decode('utf-8')
             texts = [line.strip() for line in content.split('\n') if line.strip()]
         elif file.name.endswith('.csv'):
-            df = pd.read_csv(file)
             texts = df.iloc[:, 0].astype(str).tolist()
         else:
-            return "❌ Unsupported file format! Please use .txt or .csv files.", None
         if not texts:
-            return "❌ No text found in file!", None
-        # Limit number of texts
         if len(texts) > max_texts:
             texts = texts[:max_texts]
-            status_msg = f"⚠️ Processing limited to {max_texts} texts due to size constraints.\n"
-        else:
-            status_msg = ""
-        # Process all texts
         results = []
         for i, text in enumerate(texts):
             if text.strip():
                 prediction, probabilities, _ = make_prediction(text, model_choice)
@@ -241,526 +215,337 @@ def process_batch_file(file, model_choice: str, max_texts: int = 100) -> Tuple[s
                     })
         if results:
-            # Create results DataFrame
-            results_df = pd.DataFrame(results)
-            # Generate summary
             positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
             negative_count = len(results) - positive_count
             avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
-            summary = f"""
-            {status_msg}✅ **Successfully processed {len(results)} texts!**
-            📊 **Summary Statistics:**
-            - Total Processed: {len(results)}
-            - 😊 Positive: {positive_count} ({positive_count/len(results):.1%})
-            - 😞 Negative: {negative_count} ({negative_count/len(results):.1%})
-            - Average Confidence: {avg_confidence:.1f}%
-            """
-            # Convert DataFrame to CSV string for download
-            csv_string = results_df.to_csv(index=False)
-            return summary, csv_string
         else:
-            return "❌ No valid texts could be processed!", None
     except Exception as e:
-        return f"❌ Error processing file: {str(e)}", None
-def compare_models(text: str) -> Tuple[str, Optional[plt.Figure]]:
     """Compare predictions from different models"""
     if MODELS is None:
-        return "❌ No models loaded!", None
-    if not text or not text.strip():
-        return "⚠️ Please enter some text to compare!", None
     available_models = get_available_models()
     if len(available_models) < 2:
-        return "ℹ️ Need at least 2 models for comparison. Only one model available.", None
-    comparison_results = []
     for model_name in available_models:
         prediction, probabilities, _ = make_prediction(text, model_name)
         if prediction and probabilities is not None:
-            comparison_results.append({
                 'Model': model_name,
                 'Prediction': prediction,
                 'Confidence': f"{max(probabilities):.1%}",
-                'Negative %': f"{probabilities[0]:.1%}",
-                'Positive %': f"{probabilities[1]:.1%}",
-                'Raw_Probs': probabilities
             })
-    if comparison_results:
         # Create comparison text
-        comparison_text = "🔍 **Model Comparison Results:**\n\n"
-        for result in comparison_results:
             comparison_text += f"**{result['Model']}:**\n"
             comparison_text += f"- Prediction: {result['Prediction']}\n"
             comparison_text += f"- Confidence: {result['Confidence']}\n"
-            comparison_text += f"- Negative: {result['Negative %']}, Positive: {result['Positive %']}\n\n"
         # Agreement analysis
-        predictions = [r['Prediction'] for r in comparison_results]
         if len(set(predictions)) == 1:
-            comparison_text += f"✅ **Perfect Agreement**: All models predict **{predictions[0]} Sentiment**"
         else:
-            comparison_text += "⚠️ **Models Disagree** on prediction:\n"
-            for result in comparison_results:
-                comparison_text += f"- {result['Model']}: {result['Prediction']}\n"
-        # Create side-by-side comparison plot
-        fig, axes = plt.subplots(1, len(comparison_results), figsize=(6*len(comparison_results), 5))
-        if len(comparison_results) == 1:
             axes = [axes]
-        for i, result in enumerate(comparison_results):
             ax = axes[i]
             classes = ['Negative', 'Positive']
             colors = ['#ff6b6b', '#51cf66']
-            bars = ax.bar(classes, result['Raw_Probs'], color=colors, alpha=0.8)
-            # Add percentage labels
-            for bar, prob in zip(bars, result['Raw_Probs']):
                 height = bar.get_height()
                 ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
                        f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')
             ax.set_ylim(0, 1.1)
-            ax.set_title(f"{result['Model']}\n{result['Prediction']}", fontweight='bold')
             ax.grid(axis='y', alpha=0.3)
-            # Style
-            ax.spines['top'].set_visible(False)
-            ax.spines['right'].set_visible(False)
         plt.tight_layout()
         return comparison_text, fig
     else:
-        return "❌ Failed to get predictions from models!", None
-def get_model_info() -> str:
-    """Get model information and status"""
     if MODELS is None:
         return """
-        ❌ **No models loaded!**
-        Please ensure you have the following files in the 'models/' directory:
         - sentiment_analysis_pipeline.pkl (complete pipeline), OR
         - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
         - tfidf_vectorizer.pkl + multinomial_nb_model.pkl
         """
-    info_text = "✅ **Models are loaded and ready!**\n\n"
-    # Available models
-    info_text += "🔧 **Available Models:**\n\n"
     if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
-        info_text += """
-        **📈 Logistic Regression**
-        - Type: Linear Classification Model
-        - Algorithm: Logistic Regression with L2 regularization
-        - Features: TF-IDF vectors (unigrams + bigrams)
-        - Strengths: Fast prediction, interpretable, good baseline
-        """
     if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
-        info_text += """
-        **🎯 Multinomial Naive Bayes**
-        - Type: Probabilistic Classification Model
-        - Algorithm: Multinomial Naive Bayes
-        - Features: TF-IDF vectors (unigrams + bigrams)
-        - Strengths: Fast training, works with small datasets
-        """
-    # Feature engineering
-    info_text += """
-    🔤 **Feature Engineering:**
-    - Vectorization: TF-IDF (Term Frequency-Inverse Document Frequency)
-    - Max Features: 5,000 most important terms
-    - N-grams: Unigrams (1-word) and Bigrams (2-word phrases)
-    - Min Document Frequency: 2 (terms must appear in at least 2 documents)
-    - Stop Words: English stop words removed
-    """
-    # File status
-    info_text += "📁 **Model Files Status:**\n\n"
-    files_to_check = [
-        ("sentiment_analysis_pipeline.pkl", "Complete LR Pipeline", MODELS.get('pipeline_available', False)),
-        ("tfidf_vectorizer.pkl", "TF-IDF Vectorizer", MODELS.get('vectorizer_available', False)),
-        ("logistic_regression_model.pkl", "LR Classifier", MODELS.get('lr_available', False)),
-        ("multinomial_nb_model.pkl", "NB Classifier", MODELS.get('nb_available', False))
     ]
-    for filename, description, status in files_to_check:
         status_icon = "✅" if status else "❌"
-        info_text += f"- {filename}: {description} {status_icon}\n"
-    info_text += """
-    📚 **Training Information:**
-    - Dataset: Product Review Sentiment Analysis
-    - Classes: Positive and Negative sentiment
-    - Preprocessing: Text cleaning, tokenization, TF-IDF vectorization
-    - Training: Both models trained on same feature set for fair comparison
-    """
-    return info_text
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
-def create_interface():
-    """Create the main Gradio interface"""
-    # Custom CSS for better styling
-    css = """
-    .gradio-container {
-        font-family: 'Arial', sans-serif;
-    }
-    .main-header {
-        text-align: center;
-        color: #1f77b4;
-        font-size: 2.5rem;
-        margin-bottom: 1rem;
-    }
-    .tab-nav {
-        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-    }
-    """
-    with gr.Blocks(css=css, title="ML Text Classification App", theme=gr.themes.Soft()) as app:
         # Header
         gr.HTML("""
-        <div class="main-header">
-            <h1>🤖 ML Text Classification App</h1>
-            <p style="font-size: 1.2rem; color: #666;">
-                Advanced Sentiment Analysis with Multiple ML Models
-            </p>
         </div>
         """)
-        # Main tabbed interface
         with gr.Tabs():
-            # ============================================================================
-            # SINGLE PREDICTION TAB
-            # ============================================================================
             with gr.Tab("🔮 Single Prediction"):
-                gr.Markdown("### Enter text below and select a model to get sentiment predictions")
                 with gr.Row():
-                    with gr.Column(scale=2):
                         model_dropdown = gr.Dropdown(
                             choices=get_available_models(),
                             value=get_available_models()[0] if get_available_models() else None,
-                            label="Choose a model",
-                            info="Select the ML model for prediction"
                         )
                         text_input = gr.Textbox(
                             lines=5,
-                            placeholder="Type or paste your text here (e.g., product review, feedback, comment)...",
-                            label="Enter your text here",
-                            info="Enter any text you want to analyze for sentiment"
                         )
-                        # Example texts
                         with gr.Row():
-                            example_btn1 = gr.Button("Example 1", size="sm")
-                            example_btn2 = gr.Button("Example 2", size="sm")
-                            example_btn3 = gr.Button("Example 3", size="sm")
-                        predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary", size="lg")
-                    with gr.Column(scale=2):
-                        prediction_result = gr.Markdown(label="Prediction Result")
-                        confidence_result = gr.Markdown(label="Confidence")
-                        prob_details = gr.Markdown(label="Detailed Probabilities")
-                        interpretation = gr.Markdown(label="Interpretation")
-                with gr.Row():
-                    prob_plot = gr.Plot(label="Probability Visualization")
-                # Example text handlers
-                example_btn1.click(
-                    lambda: "This product is absolutely amazing! Best purchase I've made this year.",
                     outputs=text_input
                 )
-                example_btn2.click(
-                    lambda: "Terrible quality, broke after one day. Complete waste of money.",
                     outputs=text_input
                 )
-                example_btn3.click(
                     lambda: "It's okay, nothing special but does the job.",
                     outputs=text_input
                 )
                 # Prediction handler
                 predict_btn.click(
-                    predict_single_text,
                     inputs=[text_input, model_dropdown],
-                    outputs=[prediction_result, confidence_result, prob_details, interpretation, prob_plot]
                 )
-            # ============================================================================
-            # BATCH PROCESSING TAB
-            # ============================================================================
             with gr.Tab("📁 Batch Processing"):
-                gr.Markdown("### Upload a text file or CSV to process multiple texts at once")
                 with gr.Row():
                     with gr.Column():
                         file_upload = gr.File(
-                            label="Choose a file",
-                            file_types=[".txt", ".csv"],
-                            info="Upload a .txt file (one text per line) or .csv file (text in first column)"
                         )
-                        batch_model_dropdown = gr.Dropdown(
                             choices=get_available_models(),
                             value=get_available_models()[0] if get_available_models() else None,
-                            label="Choose model for batch processing"
                         )
-                        max_texts_slider = gr.Slider(
                             minimum=10,
-                            maximum=1000,
                             value=100,
                             step=10,
-                            label="Maximum texts to process",
-                            info="Limit processing for performance"
                         )
-                        process_btn = gr.Button("📊 Process File", variant="primary", size="lg")
                     with gr.Column():
-                        batch_results = gr.Markdown(label="Processing Results")
-                        download_file = gr.File(
-                            label="Download Results",
-                            visible=False
-                        )
-                # File format examples
-                with gr.Accordion("📄 Example File Formats", open=False):
-                    gr.Markdown("""
-                    **Text File (.txt):**
-                    ```
-                    This product is amazing!
-                    Terrible quality, very disappointed
-                    Great service and fast delivery
-                    ```
-                    **CSV File (.csv):**
-                    ```
-                    text,category
-                    "Amazing product, love it!",review
-                    "Poor quality, not satisfied",review
-                    ```
-                    """)
-                # Batch processing handler
-                def handle_batch_processing(file, model_choice, max_texts):
-                    summary, csv_data = process_batch_file(file, model_choice, max_texts)
-                    if csv_data:
-                        # Save CSV data to a temporary file for download
-                        csv_file = gr.File(value=io.StringIO(csv_data), visible=True)
-                        return summary, csv_file
-                    else:
-                        return summary, gr.File(visible=False)
                 process_btn.click(
-                    handle_batch_processing,
-                    inputs=[file_upload, batch_model_dropdown, max_texts_slider],
-                    outputs=[batch_results, download_file]
                 )
-            # ============================================================================
-            # MODEL COMPARISON TAB
-            # ============================================================================
             with gr.Tab("⚖️ Model Comparison"):
-                gr.Markdown("### Compare predictions from different models on the same text")
                 with gr.Row():
                     with gr.Column():
-                        comparison_text = gr.Textbox(
                             lines=4,
-                            placeholder="Enter text to see how different models perform...",
-                            label="Enter text to compare models",
-                            info="Try texts with mixed sentiment for interesting comparisons"
                         )
-                        compare_btn = gr.Button("🔍 Compare All Models", variant="primary", size="lg")
-                        # Quick examples for comparison
                         with gr.Row():
                             comp_ex1 = gr.Button("Mixed Example 1", size="sm")
                             comp_ex2 = gr.Button("Mixed Example 2", size="sm")
-                            comp_ex3 = gr.Button("Mixed Example 3", size="sm")
                     with gr.Column():
-                        comparison_results = gr.Markdown(label="Comparison Results")
-                with gr.Row():
-                    comparison_plot = gr.Plot(label="Model Comparison Visualization")
-                # Comparison example handlers
                 comp_ex1.click(
                     lambda: "This movie was okay but not great.",
-                    outputs=comparison_text
                 )
                 comp_ex2.click(
                     lambda: "The product is fine, I guess.",
-                    outputs=comparison_text
-                )
-                comp_ex3.click(
-                    lambda: "Could be better, could be worse.",
-                    outputs=comparison_text
                 )
-                # Comparison handler
                 compare_btn.click(
-                    compare_models,
-                    inputs=comparison_text,
-                    outputs=[comparison_results, comparison_plot]
                 )
-            # ============================================================================
-            # MODEL INFO TAB
-            # ============================================================================
             with gr.Tab("📊 Model Info"):
-                model_info_display = gr.Markdown(
                     value=get_model_info(),
                     label="Model Information"
                 )
-                refresh_info_btn = gr.Button("🔄 Refresh Info", size="sm")
-                refresh_info_btn.click(
-                    get_model_info,
-                    outputs=model_info_display
-                )
-            # ============================================================================
-            # HELP TAB
-            # ============================================================================
-            with gr.Tab("❓ Help"):
-                gr.Markdown("""
-                ## 📚 How to Use This App
-                ### 🔮 Single Prediction
-                1. **Select a model** from the dropdown (Logistic Regression or Multinomial Naive Bayes)
-                2. **Enter text** in the text area (product reviews, comments, feedback)
-                3. **Click 'Analyze Sentiment'** to get sentiment analysis results
-                4. **View results:** prediction, confidence score, and probability breakdown
-                5. **Try examples:** Use the provided example buttons to test the models
-                ### 📁 Batch Processing
-                1. **Prepare your file:**
-                   - **.txt file:** One text per line
-                   - **.csv file:** Text in the first column
-                2. **Upload the file** using the file uploader
-                3. **Select a model** for processing
-                4. **Adjust max texts** slider if needed
-                5. **Click 'Process File'** to analyze all texts
-                6. **Download results** as CSV file with predictions and probabilities
-                ### ⚖️ Model Comparison
-                1. **Enter text** you want to analyze
-                2. **Click 'Compare All Models'** to get predictions from both models
-                3. **View comparison results** showing predictions and confidence scores
-                4. **Analyze agreement:** See if models agree or disagree
-                5. **Compare visualizations:** Side-by-side probability charts
-                ### 🔧 Troubleshooting
-                **Models not loading:**
-                - Ensure model files (.pkl) are in the 'models/' directory
-                - Check that required files exist:
-                  - tfidf_vectorizer.pkl (required)
-                  - sentiment_analysis_pipeline.pkl (for LR pipeline)
-                  - logistic_regression_model.pkl (for LR individual)
-                  - multinomial_nb_model.pkl (for NB model)
-                **Prediction errors:**
-                - Make sure input text is not empty
-                - Try shorter texts if getting memory errors
-                - Check that text contains readable characters
-                **File upload issues:**
-                - Ensure file format is .txt or .csv
-                - Check file encoding (should be UTF-8)
-                - Verify CSV has text in the first column
-                ### 💻 Project Structure
-                ```
-                gradio_ml_app/
-                ├── app.py                              # Main application
-                ├── requirements.txt                    # Dependencies
-                ├── models/                            # Model files
-                │   ├── sentiment_analysis_pipeline.pkl # LR complete pipeline
-                │   ├── tfidf_vectorizer.pkl           # Feature extraction
-                │   ├── logistic_regression_model.pkl  # LR classifier
-                │   └── multinomial_nb_model.pkl       # NB classifier
-                └── sample_data/                       # Sample files
-                    ├── sample_texts.txt
-                    └── sample_data.csv
-                ```
-                """)
         # Footer
         gr.HTML("""
-        <div style='text-align: center; color: #666666; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee;'>
             <p><strong>🤖 ML Text Classification App</strong></p>
-            <p>Built with ❤️ using Gradio | Machine Learning Text Classification Demo | By Maaz Amjad</p>
-            <p><small>As a part of the courses series <strong>Introduction to Large Language Models/Intro to AI Agents</strong></small></p>
-            <p><small>This app demonstrates sentiment analysis using trained ML models</small></p>
         </div>
         """)
     return app
 # ============================================================================
-# MAIN EXECUTION
 # ============================================================================
 if __name__ == "__main__":
-    # Check model status on startup
     if MODELS is None:
         print("⚠️ Warning: No models loaded!")
-        print("Please ensure you have the required model files in the 'models/' directory.")
     else:
-        available_models = get_available_models()
-        print(f"✅ Successfully loaded {len(available_models)} model(s): {', '.join(available_models)}")
-    # Create and launch the interface
-    app = create_interface()
-    # Launch with custom settings
     app.launch(
-        server_name="0.0.0.0",  # Make accessible from any IP
-        server_port=7860,       # Default Gradio port
-        share=False,            # Set to True to create public link
-        debug=True,             # Enable debug mode
-        show_error=True,        # Show detailed errors
-        inbrowser=True          # Open browser automatically
     )

+# GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION
+# =================================================
 import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import matplotlib.pyplot as plt
 import warnings
+import tempfile
+import os
+from typing import Tuple, List, Optional
 warnings.filterwarnings('ignore')
 # ============================================================================
+# MODEL LOADING
 # ============================================================================
 def load_models():
     models = {}
     try:
+        # Load pipeline
         try:
             models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl')
             models['pipeline_available'] = True
+        except:
             models['pipeline_available'] = False
+        # Load vectorizer
         try:
             models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl')
             models['vectorizer_available'] = True
+        except:
             models['vectorizer_available'] = False
+        # Load LR model
         try:
             models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl')
             models['lr_available'] = True
+        except:
             models['lr_available'] = False
+        # Load NB model
         try:
             models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl')
             models['nb_available'] = True
+        except:
             models['nb_available'] = False
+        # Check if we have working models
         pipeline_ready = models['pipeline_available']
         individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
+        return models if (pipeline_ready or individual_ready) else None
     except Exception as e:
         print(f"Error loading models: {e}")
 MODELS = load_models()
 # ============================================================================
+# CORE FUNCTIONS
 # ============================================================================
+def get_available_models():
+    """Get available model names"""
     if MODELS is None:
+        return ["No models available"]
+    available = []
+    if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
+        available.append("Logistic Regression")
+    if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
+        available.append("Multinomial Naive Bayes")
+    return available if available else ["No models available"]
+def make_prediction(text, model_choice):
+    """Make prediction using selected model"""
+    if MODELS is None or not text.strip():
+        return None, None, "Please enter text and ensure models are loaded"
     try:
         if model_choice == "Logistic Regression":
             if MODELS.get('pipeline_available'):
                 prediction = MODELS['pipeline'].predict([text])[0]
                 probabilities = MODELS['pipeline'].predict_proba([text])[0]
             elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
                 X = MODELS['vectorizer'].transform([text])
                 prediction = MODELS['logistic_regression'].predict(X)[0]
                 probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
+            else:
+                return None, None, "Logistic Regression model not available"
         elif model_choice == "Multinomial Naive Bayes":
             if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
                 X = MODELS['vectorizer'].transform([text])
                 prediction = MODELS['naive_bayes'].predict(X)[0]
                 probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
+            else:
+                return None, None, "Naive Bayes model not available"
+        # Convert prediction
+        class_names = ['Negative', 'Positive']
+        prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction)
+        return prediction_label, probabilities, "Success"
     except Exception as e:
+        return None, None, f"Error: {str(e)}"
+def create_plot(probabilities):
+    """Create probability plot"""
     fig, ax = plt.subplots(figsize=(8, 5))
+    classes = ['Negative', 'Positive']
     colors = ['#ff6b6b', '#51cf66']
+    bars = ax.bar(classes, probabilities, color=colors, alpha=0.8)
+    # Add labels
     for bar, prob in zip(bars, probabilities):
         height = bar.get_height()
         ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+                f'{prob:.1%}', ha='center', va='bottom', fontweight='bold')
     ax.set_ylim(0, 1.1)
+    ax.set_ylabel('Probability')
+    ax.set_title('Sentiment Prediction Probabilities')
     ax.grid(axis='y', alpha=0.3)
     plt.tight_layout()
     return fig
 # INTERFACE FUNCTIONS
 # ============================================================================
+def predict_text(text, model_choice):
     """Single text prediction interface"""
     prediction, probabilities, status = make_prediction(text, model_choice)
         confidence = max(probabilities)
         # Format results
+        result = f"**Prediction:** {prediction} Sentiment\n"
+        result += f"**Confidence:** {confidence:.1%}\n\n"
+        result += f"**Detailed Probabilities:**\n"
+        result += f"- Negative: {probabilities[0]:.1%}\n"
+        result += f"- Positive: {probabilities[1]:.1%}\n\n"
+        # Interpretation
         if confidence >= 0.8:
+            result += "**High Confidence:** The model is very confident about this prediction."
         elif confidence >= 0.6:
+            result += "**Medium Confidence:** The model is reasonably confident."
         else:
+            result += "**Low Confidence:** The model is uncertain about this prediction."
         # Create plot
+        plot = create_plot(probabilities)
+        return result, plot
     else:
+        return f"Error: {status}", None
+def process_file(file, model_choice, max_texts):
+    """Process uploaded file"""
     if file is None:
+        return "Please upload a file!", None
     if MODELS is None:
+        return "No models loaded!", None
     try:
+        # Read file
         if file.name.endswith('.txt'):
+            with open(file.name, 'r', encoding='utf-8') as f:
+                content = f.read()
             texts = [line.strip() for line in content.split('\n') if line.strip()]
         elif file.name.endswith('.csv'):
+            df = pd.read_csv(file.name)
             texts = df.iloc[:, 0].astype(str).tolist()
         else:
+            return "Unsupported file format! Use .txt or .csv", None
         if not texts:
+            return "No text found in file!", None
+        # Limit texts
         if len(texts) > max_texts:
             texts = texts[:max_texts]
+        # Process texts
         results = []
         for i, text in enumerate(texts):
             if text.strip():
                 prediction, probabilities, _ = make_prediction(text, model_choice)
                     })
         if results:
+            # Create summary
             positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
             negative_count = len(results) - positive_count
             avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
+            summary = f"**Processing Complete!**\n\n"
+            summary += f"**Summary Statistics:**\n"
+            summary += f"- Total Processed: {len(results)}\n"
+            summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n"
+            summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n"
+            summary += f"- Average Confidence: {avg_confidence:.1f}%\n"
+            # Create CSV for download
+            results_df = pd.DataFrame(results)
+            # Save to temporary file
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
+                results_df.to_csv(f, index=False)
+                temp_file = f.name
+            return summary, temp_file
         else:
+            return "No valid texts could be processed!", None
     except Exception as e:
+        return f"Error processing file: {str(e)}", None
+def compare_models_func(text):
     """Compare predictions from different models"""
     if MODELS is None:
+        return "No models loaded!", None
+    if not text.strip():
+        return "Please enter text to compare!", None
     available_models = get_available_models()
     if len(available_models) < 2:
+        return "Need at least 2 models for comparison.", None
+    results = []
+    all_probs = []
     for model_name in available_models:
         prediction, probabilities, _ = make_prediction(text, model_name)
         if prediction and probabilities is not None:
+            results.append({
                 'Model': model_name,
                 'Prediction': prediction,
                 'Confidence': f"{max(probabilities):.1%}",
+                'Negative': f"{probabilities[0]:.1%}",
+                'Positive': f"{probabilities[1]:.1%}"
             })
+            all_probs.append(probabilities)
+    if results:
         # Create comparison text
+        comparison_text = "**Model Comparison Results:**\n\n"
+        for result in results:
             comparison_text += f"**{result['Model']}:**\n"
             comparison_text += f"- Prediction: {result['Prediction']}\n"
             comparison_text += f"- Confidence: {result['Confidence']}\n"
+            comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n"
         # Agreement analysis
+        predictions = [r['Prediction'] for r in results]
         if len(set(predictions)) == 1:
+            comparison_text += f"**Agreement:** All models agree on {predictions[0]} sentiment!"
         else:
+            comparison_text += "**Disagreement:** Models have different predictions."
+        # Create comparison plot
+        fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5))
+        if len(results) == 1:
             axes = [axes]
+        for i, (result, probs) in enumerate(zip(results, all_probs)):
             ax = axes[i]
             classes = ['Negative', 'Positive']
             colors = ['#ff6b6b', '#51cf66']
+            bars = ax.bar(classes, probs, color=colors, alpha=0.8)
+            # Add labels
+            for bar, prob in zip(bars, probs):
                 height = bar.get_height()
                 ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
                        f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')
             ax.set_ylim(0, 1.1)
+            ax.set_title(f"{result['Model']}\n{result['Prediction']}")
             ax.grid(axis='y', alpha=0.3)
         plt.tight_layout()
         return comparison_text, fig
     else:
+        return "Failed to get predictions!", None
+def get_model_info():
+    """Get model information"""
     if MODELS is None:
         return """
+        **No models loaded!**
+        Please ensure you have model files in the 'models/' directory:
         - sentiment_analysis_pipeline.pkl (complete pipeline), OR
         - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
         - tfidf_vectorizer.pkl + multinomial_nb_model.pkl
         """
+    info = "**Models loaded successfully!**\n\n"
+    info += "**Available Models:**\n\n"
     if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
+        info += "**Logistic Regression**\n"
+        info += "- Type: Linear Classification\n"
+        info += "- Features: TF-IDF vectors\n"
+        info += "- Strengths: Fast, interpretable\n\n"
     if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
+        info += "**Multinomial Naive Bayes**\n"
+        info += "- Type: Probabilistic Classification\n"
+        info += "- Features: TF-IDF vectors\n"
+        info += "- Strengths: Works well with small data\n\n"
+    info += "**File Status:**\n"
+    files = [
+        ("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)),
+        ("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)),
+        ("logistic_regression_model.pkl", MODELS.get('lr_available', False)),
+        ("multinomial_nb_model.pkl", MODELS.get('nb_available', False))
     ]
+    for filename, status in files:
         status_icon = "✅" if status else "❌"
+        info += f"- {filename}: {status_icon}\n"
+    return info
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
+def create_app():
+    """Create Gradio interface"""
+    with gr.Blocks(title="ML Text Classification") as app:
         # Header
         gr.HTML("""
+        <div style="text-align: center; margin-bottom: 2rem;">
+            <h1 style="color: #1f77b4; font-size: 2.5rem;">🤖 ML Text Classification App</h1>
+            <p style="font-size: 1.2rem; color: #666;">Advanced Sentiment Analysis with Multiple ML Models</p>
         </div>
         """)
+        # Main interface with tabs
         with gr.Tabs():
+            # Single Prediction Tab
             with gr.Tab("🔮 Single Prediction"):
+                gr.Markdown("### Enter text and select a model for sentiment analysis")
                 with gr.Row():
+                    with gr.Column(scale=1):
                         model_dropdown = gr.Dropdown(
                             choices=get_available_models(),
                             value=get_available_models()[0] if get_available_models() else None,
+                            label="Choose Model"
                         )
                         text_input = gr.Textbox(
                             lines=5,
+                            placeholder="Enter your text here...",
+                            label="Text Input"
                         )
                         with gr.Row():
+                            example1_btn = gr.Button("Good Example", size="sm")
+                            example2_btn = gr.Button("Bad Example", size="sm")
+                            example3_btn = gr.Button("Neutral Example", size="sm")
+                        predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
+                    with gr.Column(scale=1):
+                        prediction_output = gr.Markdown(label="Results")
+                        prediction_plot = gr.Plot(label="Probability Chart")
+                # Example handlers
+                example1_btn.click(
+                    lambda: "This product is absolutely amazing! Best purchase ever!",
                     outputs=text_input
                 )
+                example2_btn.click(
+                    lambda: "Terrible quality, broke immediately. Waste of money!",
                     outputs=text_input
                 )
+                example3_btn.click(
                     lambda: "It's okay, nothing special but does the job.",
                     outputs=text_input
                 )
                 # Prediction handler
                 predict_btn.click(
+                    predict_text,
                     inputs=[text_input, model_dropdown],
+                    outputs=[prediction_output, prediction_plot]
                 )
+            # Batch Processing Tab
             with gr.Tab("📁 Batch Processing"):
+                gr.Markdown("### Upload a file to process multiple texts")
                 with gr.Row():
                     with gr.Column():
                         file_upload = gr.File(
+                            label="Upload File (.txt or .csv)",
+                            file_types=[".txt", ".csv"]
                         )
+                        batch_model = gr.Dropdown(
                             choices=get_available_models(),
                             value=get_available_models()[0] if get_available_models() else None,
+                            label="Model for Batch Processing"
                         )
+                        max_texts = gr.Slider(
                             minimum=10,
+                            maximum=500,
                             value=100,
                             step=10,
+                            label="Max Texts to Process"
                         )
+                        process_btn = gr.Button("📊 Process File", variant="primary")
                     with gr.Column():
+                        batch_output = gr.Markdown(label="Processing Results")
+                        download_file = gr.File(label="Download Results")
+                # Process handler
                 process_btn.click(
+                    process_file,
+                    inputs=[file_upload, batch_model, max_texts],
+                    outputs=[batch_output, download_file]
                 )
+            # Model Comparison Tab
             with gr.Tab("⚖️ Model Comparison"):
+                gr.Markdown("### Compare predictions from different models")
                 with gr.Row():
                     with gr.Column():
+                        comparison_input = gr.Textbox(
                             lines=4,
+                            placeholder="Enter text to compare models...",
+                            label="Text for Comparison"
                         )
+                        compare_btn = gr.Button("🔍 Compare Models", variant="primary")
                         with gr.Row():
                             comp_ex1 = gr.Button("Mixed Example 1", size="sm")
                             comp_ex2 = gr.Button("Mixed Example 2", size="sm")
                     with gr.Column():
+                        comparison_output = gr.Markdown(label="Comparison Results")
+                comparison_plot = gr.Plot(label="Model Comparison")
+                # Example handlers
                 comp_ex1.click(
                     lambda: "This movie was okay but not great.",
+                    outputs=comparison_input
                 )
                 comp_ex2.click(
                     lambda: "The product is fine, I guess.",
+                    outputs=comparison_input
                 )
+                # Compare handler
                 compare_btn.click(
+                    compare_models_func,
+                    inputs=comparison_input,
+                    outputs=[comparison_output, comparison_plot]
                 )
+            # Model Info Tab
             with gr.Tab("📊 Model Info"):
+                model_info = gr.Markdown(
                     value=get_model_info(),
                     label="Model Information"
                 )
+                refresh_btn = gr.Button("🔄 Refresh", size="sm")
+                refresh_btn.click(get_model_info, outputs=model_info)
         # Footer
         gr.HTML("""
+        <div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee; color: #666;">
             <p><strong>🤖 ML Text Classification App</strong></p>
+            <p>Built with Gradio | By Maaz Amjad</p>
+            <p><small>Part of Introduction to Large Language Models course</small></p>
         </div>
         """)
     return app
 # ============================================================================
+# MAIN
 # ============================================================================
 if __name__ == "__main__":
+    # Check models
     if MODELS is None:
         print("⚠️ Warning: No models loaded!")
     else:
+        available = get_available_models()
+        print(f"✅ Successfully loaded {len(available)} model(s): {', '.join(available)}")
+    # Launch app
+    app = create_app()
     app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True
     )