maazamjad commited on
Commit
2bef3bb
·
verified ·
1 Parent(s): b4b21b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +631 -608
app.py CHANGED
@@ -1,52 +1,24 @@
1
- # STREAMLIT ML CLASSIFICATION APP - DUAL MODEL SUPPORT
2
  # =====================================================
3
 
4
- import streamlit as st
5
  import pandas as pd
6
  import numpy as np
7
  import joblib
8
  import matplotlib.pyplot as plt
9
  import seaborn as sns
10
-
11
- # Page Configuration
12
- st.set_page_config(
13
- page_title="ML Text Classifier",
14
- page_icon="🤖",
15
- layout="wide",
16
- initial_sidebar_state="expanded"
17
- )
18
-
19
- # Custom CSS
20
- st.markdown("""
21
- <style>
22
- .main-header {
23
- font-size: 2.5rem;
24
- color: #1f77b4;
25
- text-align: center;
26
- margin-bottom: 2rem;
27
- }
28
- .success-box {
29
- padding: 1rem;
30
- border-radius: 0.5rem;
31
- background-color: #d4edda;
32
- border: 1px solid #c3e6cb;
33
- margin: 1rem 0;
34
- }
35
- .metric-card {
36
- background-color: #f8f9fa;
37
- padding: 1rem;
38
- border-radius: 0.5rem;
39
- border-left: 4px solid #007bff;
40
- }
41
- </style>
42
- """, unsafe_allow_html=True)
43
 
44
  # ============================================================================
45
  # MODEL LOADING SECTION
46
  # ============================================================================
47
 
48
- @st.cache_resource
49
  def load_models():
 
50
  models = {}
51
 
52
  try:
@@ -83,379 +55,514 @@ def load_models():
83
  individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
84
 
85
  if not (pipeline_ready or individual_ready):
86
- st.error("No complete model setup found!")
87
  return None
88
 
89
  return models
90
 
91
  except Exception as e:
92
- st.error(f"Error loading models: {e}")
93
  return None
94
 
 
 
 
95
  # ============================================================================
96
- # PREDICTION FUNCTION
97
  # ============================================================================
98
 
99
- def make_prediction(text, model_choice, models):
100
  """Make prediction using the selected model"""
101
- if models is None:
102
- return None, None
 
 
 
103
 
104
  try:
105
  prediction = None
106
  probabilities = None
107
 
108
- if model_choice == "pipeline" and models.get('pipeline_available'):
109
- # Use the complete pipeline (Logistic Regression)
110
- prediction = models['pipeline'].predict([text])[0]
111
- probabilities = models['pipeline'].predict_proba([text])[0]
112
-
113
- elif model_choice == "logistic_regression":
114
- if models.get('pipeline_available'):
115
- # Use pipeline for LR
116
- prediction = models['pipeline'].predict([text])[0]
117
- probabilities = models['pipeline'].predict_proba([text])[0]
118
- elif models.get('vectorizer_available') and models.get('lr_available'):
119
  # Use individual components
120
- X = models['vectorizer'].transform([text])
121
- prediction = models['logistic_regression'].predict(X)[0]
122
- probabilities = models['logistic_regression'].predict_proba(X)[0]
123
 
124
- elif model_choice == "naive_bayes":
125
- if models.get('vectorizer_available') and models.get('nb_available'):
126
  # Use individual components for NB
127
- X = models['vectorizer'].transform([text])
128
- prediction = models['naive_bayes'].predict(X)[0]
129
- probabilities = models['naive_bayes'].predict_proba(X)[0]
130
 
131
  if prediction is not None and probabilities is not None:
132
  # Convert to readable format
133
  class_names = ['Negative', 'Positive']
134
  prediction_label = class_names[prediction]
135
- return prediction_label, probabilities
 
136
  else:
137
- return None, None
138
 
139
  except Exception as e:
140
- st.error(f"Error making prediction: {e}")
141
- st.error(f"Model choice: {model_choice}")
142
- st.error(f"Available models: {[k for k, v in models.items() if isinstance(v, bool) and v]}")
143
- return None, None
144
 
145
- def get_available_models(models):
146
  """Get list of available models for selection"""
 
 
 
147
  available = []
148
 
149
- if models is None:
150
- return available
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- if models.get('pipeline_available'):
153
- available.append(("logistic_regression", "📈 Logistic Regression (Pipeline)"))
154
- elif models.get('vectorizer_available') and models.get('lr_available'):
155
- available.append(("logistic_regression", "📈 Logistic Regression (Individual)"))
 
156
 
157
- if models.get('vectorizer_available') and models.get('nb_available'):
158
- available.append(("naive_bayes", "🎯 Multinomial Naive Bayes"))
 
 
159
 
160
- return available
 
 
 
 
 
 
161
 
162
  # ============================================================================
163
- # SIDEBAR NAVIGATION
164
  # ============================================================================
165
 
166
- st.sidebar.title("🧭 Navigation")
167
- st.sidebar.markdown("Choose what you want to do:")
168
-
169
- page = st.sidebar.selectbox(
170
- "Select Page:",
171
- ["🏠 Home", "🔮 Single Prediction", "📁 Batch Processing", "⚖️ Model Comparison", "📊 Model Info", "❓ Help"]
172
- )
173
-
174
- # Load models
175
- models = load_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- # ============================================================================
178
- # HOME PAGE
179
- # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- if page == "🏠 Home":
182
- st.markdown('<h1 class="main-header">🤖 ML Text Classification App</h1>', unsafe_allow_html=True)
183
-
184
- st.markdown("""
185
- Welcome to your machine learning web application! This app demonstrates sentiment analysis
186
- using multiple trained models: **Logistic Regression** and **Multinomial Naive Bayes**.
187
- """)
188
-
189
- # App overview
190
- col1, col2, col3 = st.columns(3)
191
-
192
- with col1:
193
- st.markdown("""
194
- ### 🔮 Single Prediction
195
- - Enter text manually
196
- - Choose between models
197
- - Get instant predictions
198
- - See confidence scores
199
- """)
200
 
201
- with col2:
202
- st.markdown("""
203
- ### 📁 Batch Processing
204
- - Upload text files
205
- - Process multiple texts
206
- - Compare model performance
207
- - Download results
208
- """)
209
 
210
- with col3:
211
- st.markdown("""
212
- ### ⚖️ Model Comparison
213
- - Compare different models
214
- - Side-by-side results
215
- - Agreement analysis
216
- - Performance metrics
217
- """)
218
 
219
- # Model status
220
- st.subheader("📋 Model Status")
221
- if models:
222
- st.success("✅ Models loaded successfully!")
223
-
224
- col1, col2, col3 = st.columns(3)
225
-
226
- with col1:
227
- if models.get('pipeline_available'):
228
- st.info("**📈 Logistic Regression**\n✅ Pipeline Available")
229
- elif models.get('lr_available') and models.get('vectorizer_available'):
230
- st.info("**📈 Logistic Regression**\n✅ Individual Components")
231
- else:
232
- st.warning("**📈 Logistic Regression**\n❌ Not Available")
233
-
234
- with col2:
235
- if models.get('nb_available') and models.get('vectorizer_available'):
236
- st.info("**🎯 Multinomial NB**\n✅ Available")
237
- else:
238
- st.warning("**🎯 Multinomial NB**\n❌ Not Available")
239
-
240
- with col3:
241
- if models.get('vectorizer_available'):
242
- st.info("**🔤 TF-IDF Vectorizer**\n✅ Available")
243
- else:
244
- st.warning("**🔤 TF-IDF Vectorizer**\n❌ Not Available")
 
 
 
 
 
245
 
246
- else:
247
- st.error("❌ Models not loaded. Please check model files.")
248
-
249
- # ============================================================================
250
- # SINGLE PREDICTION PAGE
251
- # ============================================================================
252
-
253
- elif page == "🔮 Single Prediction":
254
- st.header("🔮 Make a Single Prediction")
255
- st.markdown("Enter text below and select a model to get sentiment predictions.")
256
-
257
- if models:
258
- available_models = get_available_models(models)
259
-
260
- if available_models:
261
- # Model selection
262
- model_choice = st.selectbox(
263
- "Choose a model:",
264
- options=[model[0] for model in available_models],
265
- format_func=lambda x: next(model[1] for model in available_models if model[0] == x)
266
- )
267
 
268
- # Text input
269
- user_input = st.text_area(
270
- "Enter your text here:",
271
- placeholder="Type or paste your text here (e.g., product review, feedback, comment)...",
272
- height=150
273
- )
274
 
275
- # Character count
276
- if user_input:
277
- st.caption(f"Character count: {len(user_input)} | Word count: {len(user_input.split())}")
278
 
279
- # Example texts
280
- with st.expander("📝 Try these example texts"):
281
- examples = [
282
- "This product is absolutely amazing! Best purchase I've made this year.",
283
- "Terrible quality, broke after one day. Complete waste of money.",
284
- "It's okay, nothing special but does the job.",
285
- "Outstanding customer service and fast delivery. Highly recommend!",
286
- "I love this movie! It's absolutely fantastic and entertaining."
287
- ]
288
-
289
- col1, col2 = st.columns(2)
290
- for i, example in enumerate(examples):
291
- with col1 if i % 2 == 0 else col2:
292
- if st.button(f"Example {i+1}", key=f"example_{i}"):
293
- st.session_state.user_input = example
294
- st.rerun()
295
 
296
- # Use session state for user input
297
- if 'user_input' in st.session_state:
298
- user_input = st.session_state.user_input
299
 
300
- # Prediction button
301
- if st.button("🚀 Predict", type="primary"):
302
- if user_input.strip():
303
- with st.spinner('Analyzing sentiment...'):
304
- prediction, probabilities = make_prediction(user_input, model_choice, models)
305
-
306
- if prediction and probabilities is not None:
307
- # Display prediction
308
- col1, col2 = st.columns([3, 1])
309
-
310
- with col1:
311
- if prediction == "Positive":
312
- st.success(f"🎯 Prediction: **{prediction} Sentiment**")
313
- else:
314
- st.error(f"🎯 Prediction: **{prediction} Sentiment**")
315
-
316
- with col2:
317
- confidence = max(probabilities)
318
- st.metric("Confidence", f"{confidence:.1%}")
319
-
320
- # Create probability chart
321
- st.subheader("📊 Prediction Probabilities")
322
-
323
- # Detailed probabilities
324
- col1, col2 = st.columns(2)
325
- with col1:
326
- st.metric("😞 Negative", f"{probabilities[0]:.1%}")
327
- with col2:
328
- st.metric("😊 Positive", f"{probabilities[1]:.1%}")
329
-
330
- # Bar chart
331
- class_names = ['Negative', 'Positive']
332
- prob_df = pd.DataFrame({
333
- 'Sentiment': class_names,
334
- 'Probability': probabilities
335
- })
336
- st.bar_chart(prob_df.set_index('Sentiment'), height=300)
337
-
338
- else:
339
- st.error("Failed to make prediction")
340
- else:
341
- st.warning("Please enter some text to classify!")
342
- else:
343
- st.error("No models available for prediction.")
344
  else:
345
- st.warning("Models not loaded. Please check the model files.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
  # ============================================================================
348
- # BATCH PROCESSING PAGE
349
  # ============================================================================
350
 
351
- elif page == "📁 Batch Processing":
352
- st.header("📁 Upload File for Batch Processing")
353
- st.markdown("Upload a text file or CSV to process multiple texts at once.")
354
-
355
- if models:
356
- available_models = get_available_models(models)
357
-
358
- if available_models:
359
- # File upload
360
- uploaded_file = st.file_uploader(
361
- "Choose a file",
362
- type=['txt', 'csv'],
363
- help="Upload a .txt file (one text per line) or .csv file (text in first column)"
364
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
- if uploaded_file:
367
- # Model selection
368
- model_choice = st.selectbox(
369
- "Choose model for batch processing:",
370
- options=[model[0] for model in available_models],
371
- format_func=lambda x: next(model[1] for model in available_models if model[0] == x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  )
373
 
374
- # Process file
375
- if st.button("📊 Process File"):
376
- try:
377
- # Read file content
378
- if uploaded_file.type == "text/plain":
379
- content = str(uploaded_file.read(), "utf-8")
380
- texts = [line.strip() for line in content.split('\n') if line.strip()]
381
- else: # CSV
382
- df = pd.read_csv(uploaded_file)
383
- texts = df.iloc[:, 0].astype(str).tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
- if not texts:
386
- st.error("No text found in file")
387
- else:
388
- st.info(f"Processing {len(texts)} texts...")
389
-
390
- # Process all texts
391
- results = []
392
- progress_bar = st.progress(0)
393
-
394
- for i, text in enumerate(texts):
395
- if text.strip():
396
- prediction, probabilities = make_prediction(text, model_choice, models)
397
-
398
- if prediction and probabilities is not None:
399
- results.append({
400
- 'Text': text[:100] + "..." if len(text) > 100 else text,
401
- 'Full_Text': text,
402
- 'Prediction': prediction,
403
- 'Confidence': f"{max(probabilities):.1%}",
404
- 'Negative_Prob': f"{probabilities[0]:.1%}",
405
- 'Positive_Prob': f"{probabilities[1]:.1%}"
406
- })
407
-
408
- progress_bar.progress((i + 1) / len(texts))
409
-
410
- if results:
411
- # Display results
412
- st.success(f"✅ Processed {len(results)} texts successfully!")
413
-
414
- results_df = pd.DataFrame(results)
415
-
416
- # Summary statistics
417
- st.subheader("📊 Summary Statistics")
418
- col1, col2, col3, col4 = st.columns(4)
419
-
420
- positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
421
- negative_count = len(results) - positive_count
422
- avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
423
-
424
- with col1:
425
- st.metric("Total Processed", len(results))
426
- with col2:
427
- st.metric("😊 Positive", positive_count)
428
- with col3:
429
- st.metric("😞 Negative", negative_count)
430
- with col4:
431
- st.metric("Avg Confidence", f"{avg_confidence:.1f}%")
432
-
433
- # Results preview
434
- st.subheader("📋 Results Preview")
435
- st.dataframe(
436
- results_df[['Text', 'Prediction', 'Confidence']],
437
- use_container_width=True
438
- )
439
-
440
- # Download option
441
- csv = results_df.to_csv(index=False)
442
- st.download_button(
443
- label="📥 Download Full Results",
444
- data=csv,
445
- file_name=f"predictions_{model_choice}_{uploaded_file.name}.csv",
446
- mime="text/csv"
447
- )
448
- else:
449
- st.error("No valid texts could be processed")
450
-
451
- except Exception as e:
452
- st.error(f"Error processing file: {e}")
453
- else:
454
- st.info("Please upload a file to get started.")
455
 
456
- # Show example file formats
457
- with st.expander("📄 Example File Formats"):
458
- st.markdown("""
459
  **Text File (.txt):**
460
  ```
461
  This product is amazing!
@@ -470,274 +577,190 @@ elif page == "📁 Batch Processing":
470
  "Poor quality, not satisfied",review
471
  ```
472
  """)
473
- else:
474
- st.error("No models available for batch processing.")
475
- else:
476
- st.warning("Models not loaded. Please check the model files.")
477
-
478
- # ============================================================================
479
- # MODEL COMPARISON PAGE
480
- # ============================================================================
481
-
482
- elif page == "⚖️ Model Comparison":
483
- st.header("⚖️ Compare Models")
484
- st.markdown("Compare predictions from different models on the same text.")
485
-
486
- if models:
487
- available_models = get_available_models(models)
488
-
489
- if len(available_models) >= 2:
490
- # Text input for comparison
491
- comparison_text = st.text_area(
492
- "Enter text to compare models:",
493
- placeholder="Enter text to see how different models perform...",
494
- height=100
495
- )
496
-
497
- if st.button("📊 Compare All Models") and comparison_text.strip():
498
- st.subheader("🔍 Model Comparison Results")
499
-
500
- # Get predictions from all available models
501
- comparison_results = []
502
-
503
- for model_key, model_name in available_models:
504
- prediction, probabilities = make_prediction(comparison_text, model_key, models)
505
-
506
- if prediction and probabilities is not None:
507
- comparison_results.append({
508
- 'Model': model_name,
509
- 'Prediction': prediction,
510
- 'Confidence': f"{max(probabilities):.1%}",
511
- 'Negative %': f"{probabilities[0]:.1%}",
512
- 'Positive %': f"{probabilities[1]:.1%}",
513
- 'Raw_Probs': probabilities
514
- })
515
 
516
- if comparison_results:
517
- # Comparison table
518
- comparison_df = pd.DataFrame(comparison_results)
519
- st.table(comparison_df[['Model', 'Prediction', 'Confidence', 'Negative %', 'Positive %']])
520
 
521
- # Agreement analysis
522
- predictions = [r['Prediction'] for r in comparison_results]
523
- if len(set(predictions)) == 1:
524
- st.success(f"✅ All models agree: **{predictions[0]} Sentiment**")
525
  else:
526
- st.warning("⚠️ Models disagree on prediction")
527
- for result in comparison_results:
528
- model_name = result['Model'].split(' ')[1] if ' ' in result['Model'] else result['Model']
529
- st.write(f"- {model_name}: {result['Prediction']}")
530
-
531
- # Side-by-side probability charts
532
- st.subheader("📊 Detailed Probability Comparison")
533
-
534
- cols = st.columns(len(comparison_results))
535
-
536
- for i, result in enumerate(comparison_results):
537
- with cols[i]:
538
- model_name = result['Model']
539
- st.write(f"**{model_name}**")
540
-
541
- chart_data = pd.DataFrame({
542
- 'Sentiment': ['Negative', 'Positive'],
543
- 'Probability': result['Raw_Probs']
544
- })
545
- st.bar_chart(chart_data.set_index('Sentiment'))
546
-
547
- else:
548
- st.error("Failed to get predictions from models")
549
-
550
- elif len(available_models) == 1:
551
- st.info("Only one model available. Use Single Prediction page for detailed analysis.")
552
-
553
- else:
554
- st.error("No models available for comparison.")
555
- else:
556
- st.warning("Models not loaded. Please check the model files.")
557
-
558
- # ============================================================================
559
- # MODEL INFO PAGE
560
- # ============================================================================
561
-
562
- elif page == "📊 Model Info":
563
- st.header("📊 Model Information")
564
-
565
- if models:
566
- st.success("✅ Models are loaded and ready!")
567
-
568
- # Model details
569
- st.subheader("🔧 Available Models")
570
-
571
- col1, col2 = st.columns(2)
572
-
573
- with col1:
574
- st.markdown("""
575
- ### 📈 Logistic Regression
576
- **Type:** Linear Classification Model
577
- **Algorithm:** Logistic Regression with L2 regularization
578
- **Features:** TF-IDF vectors (unigrams + bigrams)
579
 
580
- **Strengths:**
581
- - Fast prediction
582
- - Interpretable coefficients
583
- - Good baseline performance
584
- - Handles sparse features well
585
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
 
587
- with col2:
588
- st.markdown("""
589
- ### 🎯 Multinomial Naive Bayes
590
- **Type:** Probabilistic Classification Model
591
- **Algorithm:** Multinomial Naive Bayes
592
- **Features:** TF-IDF vectors (unigrams + bigrams)
 
 
 
 
 
 
 
 
593
 
594
- **Strengths:**
595
- - Fast training and prediction
596
- - Works well with small datasets
597
- - Good performance on text classification
598
- - Natural probabilistic outputs
599
- """)
600
-
601
- # Feature engineering info
602
- st.subheader("🔤 Feature Engineering")
603
- st.markdown("""
604
- **Vectorization:** TF-IDF (Term Frequency-Inverse Document Frequency)
605
- - **Max Features:** 5,000 most important terms
606
- - **N-grams:** Unigrams (1-word) and Bigrams (2-word phrases)
607
- - **Min Document Frequency:** 2 (terms must appear in at least 2 documents)
608
- - **Stop Words:** English stop words removed
609
- """)
610
-
611
- # File status
612
- st.subheader("📁 Model Files Status")
613
- file_status = []
614
-
615
- files_to_check = [
616
- ("sentiment_analysis_pipeline.pkl", "Complete LR Pipeline", models.get('pipeline_available', False)),
617
- ("tfidf_vectorizer.pkl", "TF-IDF Vectorizer", models.get('vectorizer_available', False)),
618
- ("logistic_regression_model.pkl", "LR Classifier", models.get('lr_available', False)),
619
- ("multinomial_nb_model.pkl", "NB Classifier", models.get('nb_available', False))
620
- ]
621
-
622
- for filename, description, status in files_to_check:
623
- file_status.append({
624
- "File": filename,
625
- "Description": description,
626
- "Status": "✅ Loaded" if status else "❌ Not Found"
627
- })
628
-
629
- st.table(pd.DataFrame(file_status))
630
-
631
- # Training information
632
- st.subheader("📚 Training Information")
633
- st.markdown("""
634
- **Dataset:** Product Review Sentiment Analysis
635
- - **Classes:** Positive and Negative sentiment
636
- - **Preprocessing:** Text cleaning, tokenization, TF-IDF vectorization
637
- - **Training:** Both models trained on same feature set for fair comparison
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
  """)
639
-
640
- else:
641
- st.warning("Models not loaded. Please check model files in the 'models/' directory.")
642
 
643
  # ============================================================================
644
- # HELP PAGE
645
  # ============================================================================
646
 
647
- elif page == "❓ Help":
648
- st.header("❓ How to Use This App")
649
-
650
- with st.expander("🔮 Single Prediction"):
651
- st.write("""
652
- 1. **Select a model** from the dropdown (Logistic Regression or Multinomial Naive Bayes)
653
- 2. **Enter text** in the text area (product reviews, comments, feedback)
654
- 3. **Click 'Predict'** to get sentiment analysis results
655
- 4. **View results:** prediction, confidence score, and probability breakdown
656
- 5. **Try examples:** Use the provided example texts to test the models
657
- """)
658
-
659
- with st.expander("📁 Batch Processing"):
660
- st.write("""
661
- 1. **Prepare your file:**
662
- - **.txt file:** One text per line
663
- - **.csv file:** Text in the first column
664
- 2. **Upload the file** using the file uploader
665
- 3. **Select a model** for processing
666
- 4. **Click 'Process File'** to analyze all texts
667
- 5. **Download results** as CSV file with predictions and probabilities
668
- """)
669
-
670
- with st.expander("⚖️ Model Comparison"):
671
- st.write("""
672
- 1. **Enter text** you want to analyze
673
- 2. **Click 'Compare All Models'** to get predictions from both models
674
- 3. **View comparison table** showing predictions and confidence scores
675
- 4. **Analyze agreement:** See if models agree or disagree
676
- 5. **Compare probabilities:** Side-by-side probability charts
677
- """)
678
 
679
- with st.expander("🔧 Troubleshooting"):
680
- st.write("""
681
- **Common Issues and Solutions:**
682
-
683
- **Models not loading:**
684
- - Ensure model files (.pkl) are in the 'models/' directory
685
- - Check that required files exist:
686
- - tfidf_vectorizer.pkl (required)
687
- - sentiment_analysis_pipeline.pkl (for LR pipeline)
688
- - logistic_regression_model.pkl (for LR individual)
689
- - multinomial_nb_model.pkl (for NB model)
690
-
691
- **Prediction errors:**
692
- - Make sure input text is not empty
693
- - Try shorter texts if getting memory errors
694
- - Check that text contains readable characters
695
-
696
- **File upload issues:**
697
- - Ensure file format is .txt or .csv
698
- - Check file encoding (should be UTF-8)
699
- - Verify CSV has text in the first column
700
- """)
701
 
702
- # System information
703
- st.subheader("💻 Your Project Structure")
704
- st.code("""
705
- streamlit_ml_app/
706
- ├── app.py # Main application
707
- ├── requirements.txt # Dependencies
708
- ├── models/ # Model files
709
- │ ├── sentiment_analysis_pipeline.pkl # LR complete pipeline
710
- │ ├── tfidf_vectorizer.pkl # Feature extraction
711
- │ ├── logistic_regression_model.pkl # LR classifier
712
- │ └── multinomial_nb_model.pkl # NB classifier
713
- └── sample_data/ # Sample files
714
- ├── sample_texts.txt
715
- └── sample_data.csv
716
- """)
717
-
718
- # ============================================================================
719
- # FOOTER
720
- # ============================================================================
721
-
722
- st.sidebar.markdown("---")
723
- st.sidebar.markdown("### 📚 App Information")
724
- st.sidebar.info("""
725
- **ML Text Classification App**
726
- Built with Streamlit
727
-
728
- **Models:**
729
- - 📈 Logistic Regression
730
- - 🎯 Multinomial Naive Bayes
731
-
732
- **Framework:** scikit-learn
733
- **Deployment:** Streamlit Cloud Ready
734
- """)
735
-
736
- st.markdown("---")
737
- st.markdown("""
738
- <div style='text-align: center; color: #666666;'>
739
- Built with ❤️ using Streamlit | Machine Learning Text Classification Demo | By Maaz Amjad<br>
740
- <small>As a part of the courses series **Introduction to Large Language Models/Intro to AI Agents**</small><br>
741
- <small>This app demonstrates sentiment analysis using trained ML models</small>
742
- </div>
743
- """, unsafe_allow_html=True)
 
1
+ # GRADIO ML CLASSIFICATION APP - DUAL MODEL SUPPORT
2
  # =====================================================
3
 
4
+ import gradio as gr
5
  import pandas as pd
6
  import numpy as np
7
  import joblib
8
  import matplotlib.pyplot as plt
9
  import seaborn as sns
10
+ import io
11
+ import base64
12
+ from typing import Tuple, List, Optional
13
+ import warnings
14
+ warnings.filterwarnings('ignore')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # ============================================================================
17
  # MODEL LOADING SECTION
18
  # ============================================================================
19
 
 
20
  def load_models():
21
+ """Load all available ML models"""
22
  models = {}
23
 
24
  try:
 
55
  individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
56
 
57
  if not (pipeline_ready or individual_ready):
 
58
  return None
59
 
60
  return models
61
 
62
  except Exception as e:
63
+ print(f"Error loading models: {e}")
64
  return None
65
 
66
+ # Load models globally
67
+ MODELS = load_models()
68
+
69
  # ============================================================================
70
+ # PREDICTION FUNCTIONS
71
  # ============================================================================
72
 
73
+ def make_prediction(text: str, model_choice: str) -> Tuple[Optional[str], Optional[np.ndarray], str]:
74
  """Make prediction using the selected model"""
75
+ if MODELS is None:
76
+ return None, None, "❌ No models loaded!"
77
+
78
+ if not text or not text.strip():
79
+ return None, None, "⚠️ Please enter some text!"
80
 
81
  try:
82
  prediction = None
83
  probabilities = None
84
 
85
+ if model_choice == "Logistic Regression":
86
+ if MODELS.get('pipeline_available'):
87
+ # Use the complete pipeline (Logistic Regression)
88
+ prediction = MODELS['pipeline'].predict([text])[0]
89
+ probabilities = MODELS['pipeline'].predict_proba([text])[0]
90
+ elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
 
 
 
 
 
91
  # Use individual components
92
+ X = MODELS['vectorizer'].transform([text])
93
+ prediction = MODELS['logistic_regression'].predict(X)[0]
94
+ probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
95
 
96
+ elif model_choice == "Multinomial Naive Bayes":
97
+ if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
98
  # Use individual components for NB
99
+ X = MODELS['vectorizer'].transform([text])
100
+ prediction = MODELS['naive_bayes'].predict(X)[0]
101
+ probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
102
 
103
  if prediction is not None and probabilities is not None:
104
  # Convert to readable format
105
  class_names = ['Negative', 'Positive']
106
  prediction_label = class_names[prediction]
107
+ status = f"✅ Prediction successful!"
108
+ return prediction_label, probabilities, status
109
  else:
110
+ return None, None, f"❌ Model '{model_choice}' not available!"
111
 
112
  except Exception as e:
113
+ return None, None, f"Error making prediction: {str(e)}"
 
 
 
114
 
115
+ def get_available_models() -> List[str]:
116
  """Get list of available models for selection"""
117
+ if MODELS is None:
118
+ return ["No models available"]
119
+
120
  available = []
121
 
122
+ if MODELS.get('pipeline_available'):
123
+ available.append("Logistic Regression")
124
+ elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
125
+ available.append("Logistic Regression")
126
+
127
+ if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
128
+ available.append("Multinomial Naive Bayes")
129
+
130
+ return available if available else ["No models available"]
131
+
132
+ def create_probability_plot(probabilities: np.ndarray) -> plt.Figure:
133
+ """Create a probability visualization"""
134
+ fig, ax = plt.subplots(figsize=(8, 5))
135
+
136
+ classes = ['Negative 😞', 'Positive 😊']
137
+ colors = ['#ff6b6b', '#51cf66']
138
+
139
+ bars = ax.bar(classes, probabilities, color=colors, alpha=0.8, edgecolor='white', linewidth=2)
140
 
141
+ # Add percentage labels on bars
142
+ for bar, prob in zip(bars, probabilities):
143
+ height = bar.get_height()
144
+ ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
145
+ f'{prob:.1%}', ha='center', va='bottom', fontweight='bold', fontsize=12)
146
 
147
+ ax.set_ylim(0, 1.1)
148
+ ax.set_ylabel('Probability', fontsize=12, fontweight='bold')
149
+ ax.set_title('Sentiment Prediction Probabilities', fontsize=14, fontweight='bold', pad=20)
150
+ ax.grid(axis='y', alpha=0.3)
151
 
152
+ # Style improvements
153
+ ax.spines['top'].set_visible(False)
154
+ ax.spines['right'].set_visible(False)
155
+ ax.set_facecolor('#f8f9fa')
156
+
157
+ plt.tight_layout()
158
+ return fig
159
 
160
  # ============================================================================
161
+ # INTERFACE FUNCTIONS
162
  # ============================================================================
163
 
164
+ def predict_single_text(text: str, model_choice: str) -> Tuple[str, str, str, str, Optional[plt.Figure]]:
165
+ """Single text prediction interface"""
166
+ prediction, probabilities, status = make_prediction(text, model_choice)
167
+
168
+ if prediction and probabilities is not None:
169
+ confidence = max(probabilities)
170
+
171
+ # Format results
172
+ result_text = f"🎯 **Prediction: {prediction} Sentiment**"
173
+ confidence_text = f"🎯 **Confidence: {confidence:.1%}**"
174
+
175
+ # Detailed probabilities
176
+ prob_details = f"""
177
+ 📊 **Detailed Probabilities:**
178
+ - 😞 Negative: {probabilities[0]:.1%}
179
+ - 😊 Positive: {probabilities[1]:.1%}
180
+ """
181
+
182
+ # Confidence interpretation
183
+ if confidence >= 0.8:
184
+ interpretation = "🔥 **High Confidence**: The model is very confident about this prediction."
185
+ elif confidence >= 0.6:
186
+ interpretation = "✅ **Medium Confidence**: The model is reasonably confident about this prediction."
187
+ else:
188
+ interpretation = "⚠️ **Low Confidence**: The model is uncertain. Consider the context carefully."
189
+
190
+ # Create plot
191
+ plot = create_probability_plot(probabilities)
192
+
193
+ return result_text, confidence_text, prob_details, interpretation, plot
194
+ else:
195
+ return status, "", "", "", None
196
 
197
+ def process_batch_file(file, model_choice: str, max_texts: int = 100) -> Tuple[str, Optional[str]]:
198
+ """Process batch file for multiple predictions"""
199
+ if file is None:
200
+ return "⚠️ Please upload a file!", None
201
+
202
+ if MODELS is None:
203
+ return "❌ No models loaded!", None
204
+
205
+ try:
206
+ # Read file content
207
+ if file.name.endswith('.txt'):
208
+ content = file.read().decode('utf-8')
209
+ texts = [line.strip() for line in content.split('\n') if line.strip()]
210
+ elif file.name.endswith('.csv'):
211
+ df = pd.read_csv(file)
212
+ texts = df.iloc[:, 0].astype(str).tolist()
213
+ else:
214
+ return "❌ Unsupported file format! Please use .txt or .csv files.", None
215
+
216
+ if not texts:
217
+ return "❌ No text found in file!", None
218
+
219
+ # Limit number of texts
220
+ if len(texts) > max_texts:
221
+ texts = texts[:max_texts]
222
+ status_msg = f"⚠️ Processing limited to {max_texts} texts due to size constraints.\n"
223
+ else:
224
+ status_msg = ""
225
+
226
+ # Process all texts
227
+ results = []
228
+
229
+ for i, text in enumerate(texts):
230
+ if text.strip():
231
+ prediction, probabilities, _ = make_prediction(text, model_choice)
232
+
233
+ if prediction and probabilities is not None:
234
+ results.append({
235
+ 'Index': i + 1,
236
+ 'Text': text[:100] + "..." if len(text) > 100 else text,
237
+ 'Prediction': prediction,
238
+ 'Confidence': f"{max(probabilities):.1%}",
239
+ 'Negative_Prob': f"{probabilities[0]:.1%}",
240
+ 'Positive_Prob': f"{probabilities[1]:.1%}"
241
+ })
242
+
243
+ if results:
244
+ # Create results DataFrame
245
+ results_df = pd.DataFrame(results)
246
+
247
+ # Generate summary
248
+ positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
249
+ negative_count = len(results) - positive_count
250
+ avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
251
+
252
+ summary = f"""
253
+ {status_msg}✅ **Successfully processed {len(results)} texts!**
254
+
255
+ 📊 **Summary Statistics:**
256
+ - Total Processed: {len(results)}
257
+ - 😊 Positive: {positive_count} ({positive_count/len(results):.1%})
258
+ - 😞 Negative: {negative_count} ({negative_count/len(results):.1%})
259
+ - Average Confidence: {avg_confidence:.1f}%
260
+ """
261
+
262
+ # Convert DataFrame to CSV string for download
263
+ csv_string = results_df.to_csv(index=False)
264
+
265
+ return summary, csv_string
266
+ else:
267
+ return "❌ No valid texts could be processed!", None
268
+
269
+ except Exception as e:
270
+ return f"❌ Error processing file: {str(e)}", None
271
 
272
+ def compare_models(text: str) -> Tuple[str, Optional[plt.Figure]]:
273
+ """Compare predictions from different models"""
274
+ if MODELS is None:
275
+ return "❌ No models loaded!", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ if not text or not text.strip():
278
+ return "⚠️ Please enter some text to compare!", None
 
 
 
 
 
 
279
 
280
+ available_models = get_available_models()
281
+
282
+ if len(available_models) < 2:
283
+ return "ℹ️ Need at least 2 models for comparison. Only one model available.", None
284
+
285
+ comparison_results = []
 
 
286
 
287
+ for model_name in available_models:
288
+ prediction, probabilities, _ = make_prediction(text, model_name)
289
+
290
+ if prediction and probabilities is not None:
291
+ comparison_results.append({
292
+ 'Model': model_name,
293
+ 'Prediction': prediction,
294
+ 'Confidence': f"{max(probabilities):.1%}",
295
+ 'Negative %': f"{probabilities[0]:.1%}",
296
+ 'Positive %': f"{probabilities[1]:.1%}",
297
+ 'Raw_Probs': probabilities
298
+ })
299
+
300
+ if comparison_results:
301
+ # Create comparison text
302
+ comparison_text = "🔍 **Model Comparison Results:**\n\n"
303
+
304
+ for result in comparison_results:
305
+ comparison_text += f"**{result['Model']}:**\n"
306
+ comparison_text += f"- Prediction: {result['Prediction']}\n"
307
+ comparison_text += f"- Confidence: {result['Confidence']}\n"
308
+ comparison_text += f"- Negative: {result['Negative %']}, Positive: {result['Positive %']}\n\n"
309
+
310
+ # Agreement analysis
311
+ predictions = [r['Prediction'] for r in comparison_results]
312
+ if len(set(predictions)) == 1:
313
+ comparison_text += f"✅ **Perfect Agreement**: All models predict **{predictions[0]} Sentiment**"
314
+ else:
315
+ comparison_text += "⚠️ **Models Disagree** on prediction:\n"
316
+ for result in comparison_results:
317
+ comparison_text += f"- {result['Model']}: {result['Prediction']}\n"
318
 
319
+ # Create side-by-side comparison plot
320
+ fig, axes = plt.subplots(1, len(comparison_results), figsize=(6*len(comparison_results), 5))
321
+
322
+ if len(comparison_results) == 1:
323
+ axes = [axes]
324
+
325
+ for i, result in enumerate(comparison_results):
326
+ ax = axes[i]
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
+ classes = ['Negative', 'Positive']
329
+ colors = ['#ff6b6b', '#51cf66']
 
 
 
 
330
 
331
+ bars = ax.bar(classes, result['Raw_Probs'], color=colors, alpha=0.8)
 
 
332
 
333
+ # Add percentage labels
334
+ for bar, prob in zip(bars, result['Raw_Probs']):
335
+ height = bar.get_height()
336
+ ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
337
+ f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')
 
 
 
 
 
 
 
 
 
 
 
338
 
339
+ ax.set_ylim(0, 1.1)
340
+ ax.set_title(f"{result['Model']}\n{result['Prediction']}", fontweight='bold')
341
+ ax.grid(axis='y', alpha=0.3)
342
 
343
+ # Style
344
+ ax.spines['top'].set_visible(False)
345
+ ax.spines['right'].set_visible(False)
346
+
347
+ plt.tight_layout()
348
+
349
+ return comparison_text, fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  else:
351
+ return " Failed to get predictions from models!", None
352
+
353
+ def get_model_info() -> str:
354
+ """Get model information and status"""
355
+ if MODELS is None:
356
+ return """
357
+ ❌ **No models loaded!**
358
+
359
+ Please ensure you have the following files in the 'models/' directory:
360
+ - sentiment_analysis_pipeline.pkl (complete pipeline), OR
361
+ - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
362
+ - tfidf_vectorizer.pkl + multinomial_nb_model.pkl
363
+ """
364
+
365
+ info_text = "✅ **Models are loaded and ready!**\n\n"
366
+
367
+ # Available models
368
+ info_text += "🔧 **Available Models:**\n\n"
369
+
370
+ if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
371
+ info_text += """
372
+ **📈 Logistic Regression**
373
+ - Type: Linear Classification Model
374
+ - Algorithm: Logistic Regression with L2 regularization
375
+ - Features: TF-IDF vectors (unigrams + bigrams)
376
+ - Strengths: Fast prediction, interpretable, good baseline
377
+
378
+ """
379
+
380
+ if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
381
+ info_text += """
382
+ **🎯 Multinomial Naive Bayes**
383
+ - Type: Probabilistic Classification Model
384
+ - Algorithm: Multinomial Naive Bayes
385
+ - Features: TF-IDF vectors (unigrams + bigrams)
386
+ - Strengths: Fast training, works with small datasets
387
+
388
+ """
389
+
390
+ # Feature engineering
391
+ info_text += """
392
+ 🔤 **Feature Engineering:**
393
+ - Vectorization: TF-IDF (Term Frequency-Inverse Document Frequency)
394
+ - Max Features: 5,000 most important terms
395
+ - N-grams: Unigrams (1-word) and Bigrams (2-word phrases)
396
+ - Min Document Frequency: 2 (terms must appear in at least 2 documents)
397
+ - Stop Words: English stop words removed
398
+
399
+ """
400
+
401
+ # File status
402
+ info_text += "📁 **Model Files Status:**\n\n"
403
+
404
+ files_to_check = [
405
+ ("sentiment_analysis_pipeline.pkl", "Complete LR Pipeline", MODELS.get('pipeline_available', False)),
406
+ ("tfidf_vectorizer.pkl", "TF-IDF Vectorizer", MODELS.get('vectorizer_available', False)),
407
+ ("logistic_regression_model.pkl", "LR Classifier", MODELS.get('lr_available', False)),
408
+ ("multinomial_nb_model.pkl", "NB Classifier", MODELS.get('nb_available', False))
409
+ ]
410
+
411
+ for filename, description, status in files_to_check:
412
+ status_icon = "✅" if status else "❌"
413
+ info_text += f"- {filename}: {description} {status_icon}\n"
414
+
415
+ info_text += """
416
+
417
+ 📚 **Training Information:**
418
+ - Dataset: Product Review Sentiment Analysis
419
+ - Classes: Positive and Negative sentiment
420
+ - Preprocessing: Text cleaning, tokenization, TF-IDF vectorization
421
+ - Training: Both models trained on same feature set for fair comparison
422
+ """
423
+
424
+ return info_text
425
 
426
  # ============================================================================
427
+ # GRADIO INTERFACE
428
  # ============================================================================
429
 
430
+ def create_interface():
431
+ """Create the main Gradio interface"""
432
+
433
+ # Custom CSS for better styling
434
+ css = """
435
+ .gradio-container {
436
+ font-family: 'Arial', sans-serif;
437
+ }
438
+ .main-header {
439
+ text-align: center;
440
+ color: #1f77b4;
441
+ font-size: 2.5rem;
442
+ margin-bottom: 1rem;
443
+ }
444
+ .tab-nav {
445
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
446
+ }
447
+ """
448
+
449
+ with gr.Blocks(css=css, title="ML Text Classification App", theme=gr.themes.Soft()) as app:
450
+
451
+ # Header
452
+ gr.HTML("""
453
+ <div class="main-header">
454
+ <h1>🤖 ML Text Classification App</h1>
455
+ <p style="font-size: 1.2rem; color: #666;">
456
+ Advanced Sentiment Analysis with Multiple ML Models
457
+ </p>
458
+ </div>
459
+ """)
460
+
461
+ # Main tabbed interface
462
+ with gr.Tabs():
463
 
464
+ # ============================================================================
465
+ # SINGLE PREDICTION TAB
466
+ # ============================================================================
467
+ with gr.Tab("🔮 Single Prediction"):
468
+ gr.Markdown("### Enter text below and select a model to get sentiment predictions")
469
+
470
+ with gr.Row():
471
+ with gr.Column(scale=2):
472
+ model_dropdown = gr.Dropdown(
473
+ choices=get_available_models(),
474
+ value=get_available_models()[0] if get_available_models() else None,
475
+ label="Choose a model",
476
+ info="Select the ML model for prediction"
477
+ )
478
+
479
+ text_input = gr.Textbox(
480
+ lines=5,
481
+ placeholder="Type or paste your text here (e.g., product review, feedback, comment)...",
482
+ label="Enter your text here",
483
+ info="Enter any text you want to analyze for sentiment"
484
+ )
485
+
486
+ # Example texts
487
+ with gr.Row():
488
+ example_btn1 = gr.Button("Example 1", size="sm")
489
+ example_btn2 = gr.Button("Example 2", size="sm")
490
+ example_btn3 = gr.Button("Example 3", size="sm")
491
+
492
+ predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary", size="lg")
493
+
494
+ with gr.Column(scale=2):
495
+ prediction_result = gr.Markdown(label="Prediction Result")
496
+ confidence_result = gr.Markdown(label="Confidence")
497
+ prob_details = gr.Markdown(label="Detailed Probabilities")
498
+ interpretation = gr.Markdown(label="Interpretation")
499
+
500
+ with gr.Row():
501
+ prob_plot = gr.Plot(label="Probability Visualization")
502
+
503
+ # Example text handlers
504
+ example_btn1.click(
505
+ lambda: "This product is absolutely amazing! Best purchase I've made this year.",
506
+ outputs=text_input
507
+ )
508
+ example_btn2.click(
509
+ lambda: "Terrible quality, broke after one day. Complete waste of money.",
510
+ outputs=text_input
511
+ )
512
+ example_btn3.click(
513
+ lambda: "It's okay, nothing special but does the job.",
514
+ outputs=text_input
515
  )
516
 
517
+ # Prediction handler
518
+ predict_btn.click(
519
+ predict_single_text,
520
+ inputs=[text_input, model_dropdown],
521
+ outputs=[prediction_result, confidence_result, prob_details, interpretation, prob_plot]
522
+ )
523
+
524
+ # ============================================================================
525
+ # BATCH PROCESSING TAB
526
+ # ============================================================================
527
+ with gr.Tab("📁 Batch Processing"):
528
+ gr.Markdown("### Upload a text file or CSV to process multiple texts at once")
529
+
530
+ with gr.Row():
531
+ with gr.Column():
532
+ file_upload = gr.File(
533
+ label="Choose a file",
534
+ file_types=[".txt", ".csv"],
535
+ info="Upload a .txt file (one text per line) or .csv file (text in first column)"
536
+ )
537
+
538
+ batch_model_dropdown = gr.Dropdown(
539
+ choices=get_available_models(),
540
+ value=get_available_models()[0] if get_available_models() else None,
541
+ label="Choose model for batch processing"
542
+ )
543
+
544
+ max_texts_slider = gr.Slider(
545
+ minimum=10,
546
+ maximum=1000,
547
+ value=100,
548
+ step=10,
549
+ label="Maximum texts to process",
550
+ info="Limit processing for performance"
551
+ )
552
 
553
+ process_btn = gr.Button("📊 Process File", variant="primary", size="lg")
554
+
555
+ with gr.Column():
556
+ batch_results = gr.Markdown(label="Processing Results")
557
+
558
+ download_file = gr.File(
559
+ label="Download Results",
560
+ visible=False
561
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
+ # File format examples
564
+ with gr.Accordion("📄 Example File Formats", open=False):
565
+ gr.Markdown("""
566
  **Text File (.txt):**
567
  ```
568
  This product is amazing!
 
577
  "Poor quality, not satisfied",review
578
  ```
579
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
 
581
+ # Batch processing handler
582
+ def handle_batch_processing(file, model_choice, max_texts):
583
+ summary, csv_data = process_batch_file(file, model_choice, max_texts)
 
584
 
585
+ if csv_data:
586
+ # Save CSV data to a temporary file for download
587
+ csv_file = gr.File(value=io.StringIO(csv_data), visible=True)
588
+ return summary, csv_file
589
  else:
590
+ return summary, gr.File(visible=False)
591
+
592
+ process_btn.click(
593
+ handle_batch_processing,
594
+ inputs=[file_upload, batch_model_dropdown, max_texts_slider],
595
+ outputs=[batch_results, download_file]
596
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
 
598
+ # ============================================================================
599
+ # MODEL COMPARISON TAB
600
+ # ============================================================================
601
+ with gr.Tab("⚖️ Model Comparison"):
602
+ gr.Markdown("### Compare predictions from different models on the same text")
603
+
604
+ with gr.Row():
605
+ with gr.Column():
606
+ comparison_text = gr.Textbox(
607
+ lines=4,
608
+ placeholder="Enter text to see how different models perform...",
609
+ label="Enter text to compare models",
610
+ info="Try texts with mixed sentiment for interesting comparisons"
611
+ )
612
+
613
+ compare_btn = gr.Button("🔍 Compare All Models", variant="primary", size="lg")
614
+
615
+ # Quick examples for comparison
616
+ with gr.Row():
617
+ comp_ex1 = gr.Button("Mixed Example 1", size="sm")
618
+ comp_ex2 = gr.Button("Mixed Example 2", size="sm")
619
+ comp_ex3 = gr.Button("Mixed Example 3", size="sm")
620
+
621
+ with gr.Column():
622
+ comparison_results = gr.Markdown(label="Comparison Results")
623
+
624
+ with gr.Row():
625
+ comparison_plot = gr.Plot(label="Model Comparison Visualization")
626
+
627
+ # Comparison example handlers
628
+ comp_ex1.click(
629
+ lambda: "This movie was okay but not great.",
630
+ outputs=comparison_text
631
+ )
632
+ comp_ex2.click(
633
+ lambda: "The product is fine, I guess.",
634
+ outputs=comparison_text
635
+ )
636
+ comp_ex3.click(
637
+ lambda: "Could be better, could be worse.",
638
+ outputs=comparison_text
639
+ )
640
+
641
+ # Comparison handler
642
+ compare_btn.click(
643
+ compare_models,
644
+ inputs=comparison_text,
645
+ outputs=[comparison_results, comparison_plot]
646
+ )
647
 
648
+ # ============================================================================
649
+ # MODEL INFO TAB
650
+ # ============================================================================
651
+ with gr.Tab("📊 Model Info"):
652
+ model_info_display = gr.Markdown(
653
+ value=get_model_info(),
654
+ label="Model Information"
655
+ )
656
+
657
+ refresh_info_btn = gr.Button("🔄 Refresh Info", size="sm")
658
+ refresh_info_btn.click(
659
+ get_model_info,
660
+ outputs=model_info_display
661
+ )
662
 
663
+ # ============================================================================
664
+ # HELP TAB
665
+ # ============================================================================
666
+ with gr.Tab("❓ Help"):
667
+ gr.Markdown("""
668
+ ## 📚 How to Use This App
669
+
670
+ ### 🔮 Single Prediction
671
+ 1. **Select a model** from the dropdown (Logistic Regression or Multinomial Naive Bayes)
672
+ 2. **Enter text** in the text area (product reviews, comments, feedback)
673
+ 3. **Click 'Analyze Sentiment'** to get sentiment analysis results
674
+ 4. **View results:** prediction, confidence score, and probability breakdown
675
+ 5. **Try examples:** Use the provided example buttons to test the models
676
+
677
+ ### 📁 Batch Processing
678
+ 1. **Prepare your file:**
679
+ - **.txt file:** One text per line
680
+ - **.csv file:** Text in the first column
681
+ 2. **Upload the file** using the file uploader
682
+ 3. **Select a model** for processing
683
+ 4. **Adjust max texts** slider if needed
684
+ 5. **Click 'Process File'** to analyze all texts
685
+ 6. **Download results** as CSV file with predictions and probabilities
686
+
687
+ ### ⚖️ Model Comparison
688
+ 1. **Enter text** you want to analyze
689
+ 2. **Click 'Compare All Models'** to get predictions from both models
690
+ 3. **View comparison results** showing predictions and confidence scores
691
+ 4. **Analyze agreement:** See if models agree or disagree
692
+ 5. **Compare visualizations:** Side-by-side probability charts
693
+
694
+ ### 🔧 Troubleshooting
695
+
696
+ **Models not loading:**
697
+ - Ensure model files (.pkl) are in the 'models/' directory
698
+ - Check that required files exist:
699
+ - tfidf_vectorizer.pkl (required)
700
+ - sentiment_analysis_pipeline.pkl (for LR pipeline)
701
+ - logistic_regression_model.pkl (for LR individual)
702
+ - multinomial_nb_model.pkl (for NB model)
703
+
704
+ **Prediction errors:**
705
+ - Make sure input text is not empty
706
+ - Try shorter texts if getting memory errors
707
+ - Check that text contains readable characters
708
+
709
+ **File upload issues:**
710
+ - Ensure file format is .txt or .csv
711
+ - Check file encoding (should be UTF-8)
712
+ - Verify CSV has text in the first column
713
+
714
+ ### 💻 Project Structure
715
+ ```
716
+ gradio_ml_app/
717
+ ├── app.py # Main application
718
+ ├── requirements.txt # Dependencies
719
+ ├── models/ # Model files
720
+ │ ├── sentiment_analysis_pipeline.pkl # LR complete pipeline
721
+ │ ├── tfidf_vectorizer.pkl # Feature extraction
722
+ │ ├── logistic_regression_model.pkl # LR classifier
723
+ │ └── multinomial_nb_model.pkl # NB classifier
724
+ └── sample_data/ # Sample files
725
+ ├── sample_texts.txt
726
+ └── sample_data.csv
727
+ ```
728
+ """)
729
+
730
+ # Footer
731
+ gr.HTML("""
732
+ <div style='text-align: center; color: #666666; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee;'>
733
+ <p><strong>🤖 ML Text Classification App</strong></p>
734
+ <p>Built with ❤️ using Gradio | Machine Learning Text Classification Demo | By Maaz Amjad</p>
735
+ <p><small>As a part of the courses series <strong>Introduction to Large Language Models/Intro to AI Agents</strong></small></p>
736
+ <p><small>This app demonstrates sentiment analysis using trained ML models</small></p>
737
+ </div>
738
  """)
739
+
740
+ return app
 
741
 
742
  # ============================================================================
743
+ # MAIN EXECUTION
744
  # ============================================================================
745
 
746
+ if __name__ == "__main__":
747
+ # Check model status on startup
748
+ if MODELS is None:
749
+ print("⚠️ Warning: No models loaded!")
750
+ print("Please ensure you have the required model files in the 'models/' directory.")
751
+ else:
752
+ available_models = get_available_models()
753
+ print(f"✅ Successfully loaded {len(available_models)} model(s): {', '.join(available_models)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
 
755
+ # Create and launch the interface
756
+ app = create_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
 
758
+ # Launch with custom settings
759
+ app.launch(
760
+ server_name="0.0.0.0", # Make accessible from any IP
761
+ server_port=7860, # Default Gradio port
762
+ share=False, # Set to True to create public link
763
+ debug=True, # Enable debug mode
764
+ show_error=True, # Show detailed errors
765
+ inbrowser=True # Open browser automatically
766
+ )