maazamjad commited on
Commit
b4b21b0
·
verified ·
1 Parent(s): f45c435

Upload 8 files

Browse files
app.py ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # STREAMLIT ML CLASSIFICATION APP - DUAL MODEL SUPPORT
2
+ # =====================================================
3
+
4
+ import streamlit as st
5
+ import pandas as pd
6
+ import numpy as np
7
+ import joblib
8
+ import matplotlib.pyplot as plt
9
+ import seaborn as sns
10
+
11
+ # Page Configuration
12
+ st.set_page_config(
13
+ page_title="ML Text Classifier",
14
+ page_icon="🤖",
15
+ layout="wide",
16
+ initial_sidebar_state="expanded"
17
+ )
18
+
19
+ # Custom CSS
20
+ st.markdown("""
21
+ <style>
22
+ .main-header {
23
+ font-size: 2.5rem;
24
+ color: #1f77b4;
25
+ text-align: center;
26
+ margin-bottom: 2rem;
27
+ }
28
+ .success-box {
29
+ padding: 1rem;
30
+ border-radius: 0.5rem;
31
+ background-color: #d4edda;
32
+ border: 1px solid #c3e6cb;
33
+ margin: 1rem 0;
34
+ }
35
+ .metric-card {
36
+ background-color: #f8f9fa;
37
+ padding: 1rem;
38
+ border-radius: 0.5rem;
39
+ border-left: 4px solid #007bff;
40
+ }
41
+ </style>
42
+ """, unsafe_allow_html=True)
43
+
44
+ # ============================================================================
45
+ # MODEL LOADING SECTION
46
+ # ============================================================================
47
+
48
+ @st.cache_resource
49
+ def load_models():
50
+ models = {}
51
+
52
+ try:
53
+ # Load the main pipeline (Logistic Regression)
54
+ try:
55
+ models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl')
56
+ models['pipeline_available'] = True
57
+ except FileNotFoundError:
58
+ models['pipeline_available'] = False
59
+
60
+ # Load TF-IDF vectorizer
61
+ try:
62
+ models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl')
63
+ models['vectorizer_available'] = True
64
+ except FileNotFoundError:
65
+ models['vectorizer_available'] = False
66
+
67
+ # Load Logistic Regression model
68
+ try:
69
+ models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl')
70
+ models['lr_available'] = True
71
+ except FileNotFoundError:
72
+ models['lr_available'] = False
73
+
74
+ # Load Multinomial Naive Bayes model
75
+ try:
76
+ models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl')
77
+ models['nb_available'] = True
78
+ except FileNotFoundError:
79
+ models['nb_available'] = False
80
+
81
+ # Check if at least one complete setup is available
82
+ pipeline_ready = models['pipeline_available']
83
+ individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])
84
+
85
+ if not (pipeline_ready or individual_ready):
86
+ st.error("No complete model setup found!")
87
+ return None
88
+
89
+ return models
90
+
91
+ except Exception as e:
92
+ st.error(f"Error loading models: {e}")
93
+ return None
94
+
95
+ # ============================================================================
96
+ # PREDICTION FUNCTION
97
+ # ============================================================================
98
+
99
+ def make_prediction(text, model_choice, models):
100
+ """Make prediction using the selected model"""
101
+ if models is None:
102
+ return None, None
103
+
104
+ try:
105
+ prediction = None
106
+ probabilities = None
107
+
108
+ if model_choice == "pipeline" and models.get('pipeline_available'):
109
+ # Use the complete pipeline (Logistic Regression)
110
+ prediction = models['pipeline'].predict([text])[0]
111
+ probabilities = models['pipeline'].predict_proba([text])[0]
112
+
113
+ elif model_choice == "logistic_regression":
114
+ if models.get('pipeline_available'):
115
+ # Use pipeline for LR
116
+ prediction = models['pipeline'].predict([text])[0]
117
+ probabilities = models['pipeline'].predict_proba([text])[0]
118
+ elif models.get('vectorizer_available') and models.get('lr_available'):
119
+ # Use individual components
120
+ X = models['vectorizer'].transform([text])
121
+ prediction = models['logistic_regression'].predict(X)[0]
122
+ probabilities = models['logistic_regression'].predict_proba(X)[0]
123
+
124
+ elif model_choice == "naive_bayes":
125
+ if models.get('vectorizer_available') and models.get('nb_available'):
126
+ # Use individual components for NB
127
+ X = models['vectorizer'].transform([text])
128
+ prediction = models['naive_bayes'].predict(X)[0]
129
+ probabilities = models['naive_bayes'].predict_proba(X)[0]
130
+
131
+ if prediction is not None and probabilities is not None:
132
+ # Convert to readable format
133
+ class_names = ['Negative', 'Positive']
134
+ prediction_label = class_names[prediction]
135
+ return prediction_label, probabilities
136
+ else:
137
+ return None, None
138
+
139
+ except Exception as e:
140
+ st.error(f"Error making prediction: {e}")
141
+ st.error(f"Model choice: {model_choice}")
142
+ st.error(f"Available models: {[k for k, v in models.items() if isinstance(v, bool) and v]}")
143
+ return None, None
144
+
145
+ def get_available_models(models):
146
+ """Get list of available models for selection"""
147
+ available = []
148
+
149
+ if models is None:
150
+ return available
151
+
152
+ if models.get('pipeline_available'):
153
+ available.append(("logistic_regression", "📈 Logistic Regression (Pipeline)"))
154
+ elif models.get('vectorizer_available') and models.get('lr_available'):
155
+ available.append(("logistic_regression", "📈 Logistic Regression (Individual)"))
156
+
157
+ if models.get('vectorizer_available') and models.get('nb_available'):
158
+ available.append(("naive_bayes", "🎯 Multinomial Naive Bayes"))
159
+
160
+ return available
161
+
162
+ # ============================================================================
163
+ # SIDEBAR NAVIGATION
164
+ # ============================================================================
165
+
166
+ st.sidebar.title("🧭 Navigation")
167
+ st.sidebar.markdown("Choose what you want to do:")
168
+
169
+ page = st.sidebar.selectbox(
170
+ "Select Page:",
171
+ ["🏠 Home", "🔮 Single Prediction", "📁 Batch Processing", "⚖️ Model Comparison", "📊 Model Info", "❓ Help"]
172
+ )
173
+
174
+ # Load models
175
+ models = load_models()
176
+
177
+ # ============================================================================
178
+ # HOME PAGE
179
+ # ============================================================================
180
+
181
+ if page == "🏠 Home":
182
+ st.markdown('<h1 class="main-header">🤖 ML Text Classification App</h1>', unsafe_allow_html=True)
183
+
184
+ st.markdown("""
185
+ Welcome to your machine learning web application! This app demonstrates sentiment analysis
186
+ using multiple trained models: **Logistic Regression** and **Multinomial Naive Bayes**.
187
+ """)
188
+
189
+ # App overview
190
+ col1, col2, col3 = st.columns(3)
191
+
192
+ with col1:
193
+ st.markdown("""
194
+ ### 🔮 Single Prediction
195
+ - Enter text manually
196
+ - Choose between models
197
+ - Get instant predictions
198
+ - See confidence scores
199
+ """)
200
+
201
+ with col2:
202
+ st.markdown("""
203
+ ### 📁 Batch Processing
204
+ - Upload text files
205
+ - Process multiple texts
206
+ - Compare model performance
207
+ - Download results
208
+ """)
209
+
210
+ with col3:
211
+ st.markdown("""
212
+ ### ⚖️ Model Comparison
213
+ - Compare different models
214
+ - Side-by-side results
215
+ - Agreement analysis
216
+ - Performance metrics
217
+ """)
218
+
219
+ # Model status
220
+ st.subheader("📋 Model Status")
221
+ if models:
222
+ st.success("✅ Models loaded successfully!")
223
+
224
+ col1, col2, col3 = st.columns(3)
225
+
226
+ with col1:
227
+ if models.get('pipeline_available'):
228
+ st.info("**📈 Logistic Regression**\n✅ Pipeline Available")
229
+ elif models.get('lr_available') and models.get('vectorizer_available'):
230
+ st.info("**📈 Logistic Regression**\n✅ Individual Components")
231
+ else:
232
+ st.warning("**📈 Logistic Regression**\n❌ Not Available")
233
+
234
+ with col2:
235
+ if models.get('nb_available') and models.get('vectorizer_available'):
236
+ st.info("**🎯 Multinomial NB**\n✅ Available")
237
+ else:
238
+ st.warning("**🎯 Multinomial NB**\n❌ Not Available")
239
+
240
+ with col3:
241
+ if models.get('vectorizer_available'):
242
+ st.info("**🔤 TF-IDF Vectorizer**\n✅ Available")
243
+ else:
244
+ st.warning("**🔤 TF-IDF Vectorizer**\n❌ Not Available")
245
+
246
+ else:
247
+ st.error("❌ Models not loaded. Please check model files.")
248
+
249
+ # ============================================================================
250
+ # SINGLE PREDICTION PAGE
251
+ # ============================================================================
252
+
253
+ elif page == "🔮 Single Prediction":
254
+ st.header("🔮 Make a Single Prediction")
255
+ st.markdown("Enter text below and select a model to get sentiment predictions.")
256
+
257
+ if models:
258
+ available_models = get_available_models(models)
259
+
260
+ if available_models:
261
+ # Model selection
262
+ model_choice = st.selectbox(
263
+ "Choose a model:",
264
+ options=[model[0] for model in available_models],
265
+ format_func=lambda x: next(model[1] for model in available_models if model[0] == x)
266
+ )
267
+
268
+ # Text input
269
+ user_input = st.text_area(
270
+ "Enter your text here:",
271
+ placeholder="Type or paste your text here (e.g., product review, feedback, comment)...",
272
+ height=150
273
+ )
274
+
275
+ # Character count
276
+ if user_input:
277
+ st.caption(f"Character count: {len(user_input)} | Word count: {len(user_input.split())}")
278
+
279
+ # Example texts
280
+ with st.expander("📝 Try these example texts"):
281
+ examples = [
282
+ "This product is absolutely amazing! Best purchase I've made this year.",
283
+ "Terrible quality, broke after one day. Complete waste of money.",
284
+ "It's okay, nothing special but does the job.",
285
+ "Outstanding customer service and fast delivery. Highly recommend!",
286
+ "I love this movie! It's absolutely fantastic and entertaining."
287
+ ]
288
+
289
+ col1, col2 = st.columns(2)
290
+ for i, example in enumerate(examples):
291
+ with col1 if i % 2 == 0 else col2:
292
+ if st.button(f"Example {i+1}", key=f"example_{i}"):
293
+ st.session_state.user_input = example
294
+ st.rerun()
295
+
296
+ # Use session state for user input
297
+ if 'user_input' in st.session_state:
298
+ user_input = st.session_state.user_input
299
+
300
+ # Prediction button
301
+ if st.button("🚀 Predict", type="primary"):
302
+ if user_input.strip():
303
+ with st.spinner('Analyzing sentiment...'):
304
+ prediction, probabilities = make_prediction(user_input, model_choice, models)
305
+
306
+ if prediction and probabilities is not None:
307
+ # Display prediction
308
+ col1, col2 = st.columns([3, 1])
309
+
310
+ with col1:
311
+ if prediction == "Positive":
312
+ st.success(f"🎯 Prediction: **{prediction} Sentiment**")
313
+ else:
314
+ st.error(f"🎯 Prediction: **{prediction} Sentiment**")
315
+
316
+ with col2:
317
+ confidence = max(probabilities)
318
+ st.metric("Confidence", f"{confidence:.1%}")
319
+
320
+ # Create probability chart
321
+ st.subheader("📊 Prediction Probabilities")
322
+
323
+ # Detailed probabilities
324
+ col1, col2 = st.columns(2)
325
+ with col1:
326
+ st.metric("😞 Negative", f"{probabilities[0]:.1%}")
327
+ with col2:
328
+ st.metric("😊 Positive", f"{probabilities[1]:.1%}")
329
+
330
+ # Bar chart
331
+ class_names = ['Negative', 'Positive']
332
+ prob_df = pd.DataFrame({
333
+ 'Sentiment': class_names,
334
+ 'Probability': probabilities
335
+ })
336
+ st.bar_chart(prob_df.set_index('Sentiment'), height=300)
337
+
338
+ else:
339
+ st.error("Failed to make prediction")
340
+ else:
341
+ st.warning("Please enter some text to classify!")
342
+ else:
343
+ st.error("No models available for prediction.")
344
+ else:
345
+ st.warning("Models not loaded. Please check the model files.")
346
+
347
+ # ============================================================================
348
+ # BATCH PROCESSING PAGE
349
+ # ============================================================================
350
+
351
+ elif page == "📁 Batch Processing":
352
+ st.header("📁 Upload File for Batch Processing")
353
+ st.markdown("Upload a text file or CSV to process multiple texts at once.")
354
+
355
+ if models:
356
+ available_models = get_available_models(models)
357
+
358
+ if available_models:
359
+ # File upload
360
+ uploaded_file = st.file_uploader(
361
+ "Choose a file",
362
+ type=['txt', 'csv'],
363
+ help="Upload a .txt file (one text per line) or .csv file (text in first column)"
364
+ )
365
+
366
+ if uploaded_file:
367
+ # Model selection
368
+ model_choice = st.selectbox(
369
+ "Choose model for batch processing:",
370
+ options=[model[0] for model in available_models],
371
+ format_func=lambda x: next(model[1] for model in available_models if model[0] == x)
372
+ )
373
+
374
+ # Process file
375
+ if st.button("📊 Process File"):
376
+ try:
377
+ # Read file content
378
+ if uploaded_file.type == "text/plain":
379
+ content = str(uploaded_file.read(), "utf-8")
380
+ texts = [line.strip() for line in content.split('\n') if line.strip()]
381
+ else: # CSV
382
+ df = pd.read_csv(uploaded_file)
383
+ texts = df.iloc[:, 0].astype(str).tolist()
384
+
385
+ if not texts:
386
+ st.error("No text found in file")
387
+ else:
388
+ st.info(f"Processing {len(texts)} texts...")
389
+
390
+ # Process all texts
391
+ results = []
392
+ progress_bar = st.progress(0)
393
+
394
+ for i, text in enumerate(texts):
395
+ if text.strip():
396
+ prediction, probabilities = make_prediction(text, model_choice, models)
397
+
398
+ if prediction and probabilities is not None:
399
+ results.append({
400
+ 'Text': text[:100] + "..." if len(text) > 100 else text,
401
+ 'Full_Text': text,
402
+ 'Prediction': prediction,
403
+ 'Confidence': f"{max(probabilities):.1%}",
404
+ 'Negative_Prob': f"{probabilities[0]:.1%}",
405
+ 'Positive_Prob': f"{probabilities[1]:.1%}"
406
+ })
407
+
408
+ progress_bar.progress((i + 1) / len(texts))
409
+
410
+ if results:
411
+ # Display results
412
+ st.success(f"✅ Processed {len(results)} texts successfully!")
413
+
414
+ results_df = pd.DataFrame(results)
415
+
416
+ # Summary statistics
417
+ st.subheader("📊 Summary Statistics")
418
+ col1, col2, col3, col4 = st.columns(4)
419
+
420
+ positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
421
+ negative_count = len(results) - positive_count
422
+ avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])
423
+
424
+ with col1:
425
+ st.metric("Total Processed", len(results))
426
+ with col2:
427
+ st.metric("😊 Positive", positive_count)
428
+ with col3:
429
+ st.metric("😞 Negative", negative_count)
430
+ with col4:
431
+ st.metric("Avg Confidence", f"{avg_confidence:.1f}%")
432
+
433
+ # Results preview
434
+ st.subheader("📋 Results Preview")
435
+ st.dataframe(
436
+ results_df[['Text', 'Prediction', 'Confidence']],
437
+ use_container_width=True
438
+ )
439
+
440
+ # Download option
441
+ csv = results_df.to_csv(index=False)
442
+ st.download_button(
443
+ label="📥 Download Full Results",
444
+ data=csv,
445
+ file_name=f"predictions_{model_choice}_{uploaded_file.name}.csv",
446
+ mime="text/csv"
447
+ )
448
+ else:
449
+ st.error("No valid texts could be processed")
450
+
451
+ except Exception as e:
452
+ st.error(f"Error processing file: {e}")
453
+ else:
454
+ st.info("Please upload a file to get started.")
455
+
456
+ # Show example file formats
457
+ with st.expander("📄 Example File Formats"):
458
+ st.markdown("""
459
+ **Text File (.txt):**
460
+ ```
461
+ This product is amazing!
462
+ Terrible quality, very disappointed
463
+ Great service and fast delivery
464
+ ```
465
+
466
+ **CSV File (.csv):**
467
+ ```
468
+ text,category
469
+ "Amazing product, love it!",review
470
+ "Poor quality, not satisfied",review
471
+ ```
472
+ """)
473
+ else:
474
+ st.error("No models available for batch processing.")
475
+ else:
476
+ st.warning("Models not loaded. Please check the model files.")
477
+
478
+ # ============================================================================
479
+ # MODEL COMPARISON PAGE
480
+ # ============================================================================
481
+
482
+ elif page == "⚖️ Model Comparison":
483
+ st.header("⚖️ Compare Models")
484
+ st.markdown("Compare predictions from different models on the same text.")
485
+
486
+ if models:
487
+ available_models = get_available_models(models)
488
+
489
+ if len(available_models) >= 2:
490
+ # Text input for comparison
491
+ comparison_text = st.text_area(
492
+ "Enter text to compare models:",
493
+ placeholder="Enter text to see how different models perform...",
494
+ height=100
495
+ )
496
+
497
+ if st.button("📊 Compare All Models") and comparison_text.strip():
498
+ st.subheader("🔍 Model Comparison Results")
499
+
500
+ # Get predictions from all available models
501
+ comparison_results = []
502
+
503
+ for model_key, model_name in available_models:
504
+ prediction, probabilities = make_prediction(comparison_text, model_key, models)
505
+
506
+ if prediction and probabilities is not None:
507
+ comparison_results.append({
508
+ 'Model': model_name,
509
+ 'Prediction': prediction,
510
+ 'Confidence': f"{max(probabilities):.1%}",
511
+ 'Negative %': f"{probabilities[0]:.1%}",
512
+ 'Positive %': f"{probabilities[1]:.1%}",
513
+ 'Raw_Probs': probabilities
514
+ })
515
+
516
+ if comparison_results:
517
+ # Comparison table
518
+ comparison_df = pd.DataFrame(comparison_results)
519
+ st.table(comparison_df[['Model', 'Prediction', 'Confidence', 'Negative %', 'Positive %']])
520
+
521
+ # Agreement analysis
522
+ predictions = [r['Prediction'] for r in comparison_results]
523
+ if len(set(predictions)) == 1:
524
+ st.success(f"✅ All models agree: **{predictions[0]} Sentiment**")
525
+ else:
526
+ st.warning("⚠️ Models disagree on prediction")
527
+ for result in comparison_results:
528
+ model_name = result['Model'].split(' ')[1] if ' ' in result['Model'] else result['Model']
529
+ st.write(f"- {model_name}: {result['Prediction']}")
530
+
531
+ # Side-by-side probability charts
532
+ st.subheader("📊 Detailed Probability Comparison")
533
+
534
+ cols = st.columns(len(comparison_results))
535
+
536
+ for i, result in enumerate(comparison_results):
537
+ with cols[i]:
538
+ model_name = result['Model']
539
+ st.write(f"**{model_name}**")
540
+
541
+ chart_data = pd.DataFrame({
542
+ 'Sentiment': ['Negative', 'Positive'],
543
+ 'Probability': result['Raw_Probs']
544
+ })
545
+ st.bar_chart(chart_data.set_index('Sentiment'))
546
+
547
+ else:
548
+ st.error("Failed to get predictions from models")
549
+
550
+ elif len(available_models) == 1:
551
+ st.info("Only one model available. Use Single Prediction page for detailed analysis.")
552
+
553
+ else:
554
+ st.error("No models available for comparison.")
555
+ else:
556
+ st.warning("Models not loaded. Please check the model files.")
557
+
558
+ # ============================================================================
559
+ # MODEL INFO PAGE
560
+ # ============================================================================
561
+
562
+ elif page == "📊 Model Info":
563
+ st.header("📊 Model Information")
564
+
565
+ if models:
566
+ st.success("✅ Models are loaded and ready!")
567
+
568
+ # Model details
569
+ st.subheader("🔧 Available Models")
570
+
571
+ col1, col2 = st.columns(2)
572
+
573
+ with col1:
574
+ st.markdown("""
575
+ ### 📈 Logistic Regression
576
+ **Type:** Linear Classification Model
577
+ **Algorithm:** Logistic Regression with L2 regularization
578
+ **Features:** TF-IDF vectors (unigrams + bigrams)
579
+
580
+ **Strengths:**
581
+ - Fast prediction
582
+ - Interpretable coefficients
583
+ - Good baseline performance
584
+ - Handles sparse features well
585
+ """)
586
+
587
+ with col2:
588
+ st.markdown("""
589
+ ### 🎯 Multinomial Naive Bayes
590
+ **Type:** Probabilistic Classification Model
591
+ **Algorithm:** Multinomial Naive Bayes
592
+ **Features:** TF-IDF vectors (unigrams + bigrams)
593
+
594
+ **Strengths:**
595
+ - Fast training and prediction
596
+ - Works well with small datasets
597
+ - Good performance on text classification
598
+ - Natural probabilistic outputs
599
+ """)
600
+
601
+ # Feature engineering info
602
+ st.subheader("🔤 Feature Engineering")
603
+ st.markdown("""
604
+ **Vectorization:** TF-IDF (Term Frequency-Inverse Document Frequency)
605
+ - **Max Features:** 5,000 most important terms
606
+ - **N-grams:** Unigrams (1-word) and Bigrams (2-word phrases)
607
+ - **Min Document Frequency:** 2 (terms must appear in at least 2 documents)
608
+ - **Stop Words:** English stop words removed
609
+ """)
610
+
611
+ # File status
612
+ st.subheader("📁 Model Files Status")
613
+ file_status = []
614
+
615
+ files_to_check = [
616
+ ("sentiment_analysis_pipeline.pkl", "Complete LR Pipeline", models.get('pipeline_available', False)),
617
+ ("tfidf_vectorizer.pkl", "TF-IDF Vectorizer", models.get('vectorizer_available', False)),
618
+ ("logistic_regression_model.pkl", "LR Classifier", models.get('lr_available', False)),
619
+ ("multinomial_nb_model.pkl", "NB Classifier", models.get('nb_available', False))
620
+ ]
621
+
622
+ for filename, description, status in files_to_check:
623
+ file_status.append({
624
+ "File": filename,
625
+ "Description": description,
626
+ "Status": "✅ Loaded" if status else "❌ Not Found"
627
+ })
628
+
629
+ st.table(pd.DataFrame(file_status))
630
+
631
+ # Training information
632
+ st.subheader("📚 Training Information")
633
+ st.markdown("""
634
+ **Dataset:** Product Review Sentiment Analysis
635
+ - **Classes:** Positive and Negative sentiment
636
+ - **Preprocessing:** Text cleaning, tokenization, TF-IDF vectorization
637
+ - **Training:** Both models trained on same feature set for fair comparison
638
+ """)
639
+
640
+ else:
641
+ st.warning("Models not loaded. Please check model files in the 'models/' directory.")
642
+
643
+ # ============================================================================
644
+ # HELP PAGE
645
+ # ============================================================================
646
+
647
+ elif page == "❓ Help":
648
+ st.header("❓ How to Use This App")
649
+
650
+ with st.expander("🔮 Single Prediction"):
651
+ st.write("""
652
+ 1. **Select a model** from the dropdown (Logistic Regression or Multinomial Naive Bayes)
653
+ 2. **Enter text** in the text area (product reviews, comments, feedback)
654
+ 3. **Click 'Predict'** to get sentiment analysis results
655
+ 4. **View results:** prediction, confidence score, and probability breakdown
656
+ 5. **Try examples:** Use the provided example texts to test the models
657
+ """)
658
+
659
+ with st.expander("📁 Batch Processing"):
660
+ st.write("""
661
+ 1. **Prepare your file:**
662
+ - **.txt file:** One text per line
663
+ - **.csv file:** Text in the first column
664
+ 2. **Upload the file** using the file uploader
665
+ 3. **Select a model** for processing
666
+ 4. **Click 'Process File'** to analyze all texts
667
+ 5. **Download results** as CSV file with predictions and probabilities
668
+ """)
669
+
670
+ with st.expander("⚖️ Model Comparison"):
671
+ st.write("""
672
+ 1. **Enter text** you want to analyze
673
+ 2. **Click 'Compare All Models'** to get predictions from both models
674
+ 3. **View comparison table** showing predictions and confidence scores
675
+ 4. **Analyze agreement:** See if models agree or disagree
676
+ 5. **Compare probabilities:** Side-by-side probability charts
677
+ """)
678
+
679
+ with st.expander("🔧 Troubleshooting"):
680
+ st.write("""
681
+ **Common Issues and Solutions:**
682
+
683
+ **Models not loading:**
684
+ - Ensure model files (.pkl) are in the 'models/' directory
685
+ - Check that required files exist:
686
+ - tfidf_vectorizer.pkl (required)
687
+ - sentiment_analysis_pipeline.pkl (for LR pipeline)
688
+ - logistic_regression_model.pkl (for LR individual)
689
+ - multinomial_nb_model.pkl (for NB model)
690
+
691
+ **Prediction errors:**
692
+ - Make sure input text is not empty
693
+ - Try shorter texts if getting memory errors
694
+ - Check that text contains readable characters
695
+
696
+ **File upload issues:**
697
+ - Ensure file format is .txt or .csv
698
+ - Check file encoding (should be UTF-8)
699
+ - Verify CSV has text in the first column
700
+ """)
701
+
702
+ # System information
703
+ st.subheader("💻 Your Project Structure")
704
+ st.code("""
705
+ streamlit_ml_app/
706
+ ├── app.py # Main application
707
+ ├── requirements.txt # Dependencies
708
+ ├── models/ # Model files
709
+ │ ├── sentiment_analysis_pipeline.pkl # LR complete pipeline
710
+ │ ├── tfidf_vectorizer.pkl # Feature extraction
711
+ │ ├── logistic_regression_model.pkl # LR classifier
712
+ │ └── multinomial_nb_model.pkl # NB classifier
713
+ └── sample_data/ # Sample files
714
+ ├── sample_texts.txt
715
+ └── sample_data.csv
716
+ """)
717
+
718
+ # ============================================================================
719
+ # FOOTER
720
+ # ============================================================================
721
+
722
+ st.sidebar.markdown("---")
723
+ st.sidebar.markdown("### 📚 App Information")
724
+ st.sidebar.info("""
725
+ **ML Text Classification App**
726
+ Built with Streamlit
727
+
728
+ **Models:**
729
+ - 📈 Logistic Regression
730
+ - 🎯 Multinomial Naive Bayes
731
+
732
+ **Framework:** scikit-learn
733
+ **Deployment:** Streamlit Cloud Ready
734
+ """)
735
+
736
+ st.markdown("---")
737
+ st.markdown("""
738
+ <div style='text-align: center; color: #666666;'>
739
+ Built with ❤️ using Streamlit | Machine Learning Text Classification Demo | By Maaz Amjad<br>
740
+ <small>As a part of the courses series **Introduction to Large Language Models/Intro to AI Agents**</small><br>
741
+ <small>This app demonstrates sentiment analysis using trained ML models</small>
742
+ </div>
743
+ """, unsafe_allow_html=True)
models/logistic_regression_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff7b896674af8ee1f59d0083d22b64192a2dd82209ac370cd01d865bc1b978e3
3
+ size 40891
models/multinomial_nb_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5f54b34588adb0c2037c8041e40c5273131f568d71b8926de89bfb8b537d77
3
+ size 160791
models/sentiment_analysis_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7089a97828098e306dd1b930d6aa1ed71bd3d2798c2f1cc0be81dd73648062c
3
+ size 227104
models/tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:918d6daf0b27953fc64e946d67979859267fe6e69897cceaf0fa944a33873bd1
3
+ size 186359
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pandas>=2.0.0
2
+ numpy>=1.26.0
3
+ scikit-learn>=1.4.0
4
+ matplotlib>=3.7.1
5
+ seaborn>=0.12.2
6
+ plotly>=5.15.0
7
+ joblib>=1.3.2
8
+ streamlit
sample_data/sample_data.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,category
2
+ This is an amazing product! Love it!,review
3
+ "Terrible quality, very disappointed",review
4
+ Great customer service experience,review
5
+ Worst movie I've ever seen,review
6
+ Outstanding performance and quality,review
7
+ The app crashes constantly,review
8
+ Highly recommend to everyone,review
9
+ Poor value for money,review
10
+ Excellent build quality,review
11
+ Not satisfied with purchase,review
sample_data/sample_texts.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ I love this movie! It's absolutely fantastic and entertaining.
2
+ This product is terrible. Worst purchase I've ever made.
3
+ The weather is nice today, perfect for a walk.
4
+ Outstanding customer service! Highly recommend this company.
5
+ I'm so disappointed with this experience. Never again.
6
+ Great quality and fast delivery. Very satisfied!
7
+ The food was okay, nothing special but edible.
8
+ Amazing product! Exceeded all my expectations completely.
9
+ Poor quality materials and awful customer support service.
10
+ Perfect solution to my problem. Thank you so much!