import streamlit as st
import torch
import pandas as pd
import numpy as np
from pathlib import Path
import sys
import plotly.express as px
import plotly.graph_objects as go
from transformers import BertTokenizer
import nltk
# Download required NLTK data
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords')
try:
nltk.data.find('tokenizers/punkt_tab')
except LookupError:
nltk.download('punkt_tab')
try:
nltk.data.find('corpora/wordnet')
except LookupError:
nltk.download('wordnet')
# Add project root to Python path
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))
from src.models.hybrid_model import HybridFakeNewsDetector
from src.config.config import *
from src.data.preprocessor import TextPreprocessor
# Custom CSS for modern, enhanced styling
st.markdown("""
""", unsafe_allow_html=True)
@st.cache_resource
def load_model_and_tokenizer():
"""Load the model and tokenizer (cached)."""
model = HybridFakeNewsDetector(
bert_model_name=BERT_MODEL_NAME,
lstm_hidden_size=LSTM_HIDDEN_SIZE,
lstm_num_layers=LSTM_NUM_LAYERS,
dropout_rate=DROPOUT_RATE
)
state_dict = torch.load(SAVED_MODELS_DIR / "final_model.pt", map_location=torch.device('cpu'))
model_state_dict = model.state_dict()
filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict}
model.load_state_dict(filtered_state_dict, strict=False)
model.eval()
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
return model, tokenizer
@st.cache_resource
def get_preprocessor():
"""Get the text preprocessor (cached)."""
return TextPreprocessor()
def predict_news(text):
"""Predict if the given news is fake or real."""
model, tokenizer = load_model_and_tokenizer()
preprocessor = get_preprocessor()
processed_text = preprocessor.preprocess_text(text)
encoding = tokenizer.encode_plus(
processed_text,
add_special_tokens=True,
max_length=MAX_SEQUENCE_LENGTH,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
with torch.no_grad():
outputs = model(
encoding['input_ids'],
encoding['attention_mask']
)
probabilities = torch.softmax(outputs['logits'], dim=1)
prediction = torch.argmax(outputs['logits'], dim=1)
attention_weights = outputs['attention_weights']
attention_weights_np = attention_weights[0].cpu().numpy()
return {
'prediction': prediction.item(),
'label': 'FAKE' if prediction.item() == 1 else 'REAL',
'confidence': torch.max(probabilities, dim=1)[0].item(),
'probabilities': {
'REAL': probabilities[0][0].item(),
'FAKE': probabilities[0][1].item()
},
'attention_weights': attention_weights_np
}
def plot_confidence(probabilities):
"""Plot prediction confidence with enhanced styling."""
colors = ['#22c55e', '#ef4444']
fig = go.Figure(data=[
go.Bar(
x=list(probabilities.keys()),
y=list(probabilities.values()),
text=[f'{p:.1%}' for p in probabilities.values()],
textposition='auto',
textfont=dict(size=16, family="Poppins", color="white"),
marker=dict(
color=colors,
line=dict(color='rgba(255,255,255,0.3)', width=2),
pattern_shape="",
),
hovertemplate='%{x}
Confidence: %{y:.1%}',
width=[0.6, 0.6]
)
])
fig.update_layout(
title={
'text': 'š Prediction Confidence',
'x': 0.5,
'xanchor': 'center',
'font': {'size': 24, 'family': 'Poppins', 'color': '#1a202c'}
},
xaxis=dict(
title='Classification',
titlefont=dict(size=16, family='Inter', color='#4a5568'),
tickfont=dict(size=14, family='Inter', color='#4a5568'),
showgrid=False,
),
yaxis=dict(
title='Probability',
titlefont=dict(size=16, family='Inter', color='#4a5568'),
tickfont=dict(size=14, family='Inter', color='#4a5568'),
range=[0, 1],
tickformat='.0%',
showgrid=True,
gridcolor='rgba(0,0,0,0.05)',
),
template='plotly_white',
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font={'family': 'Inter'},
margin=dict(l=50, r=50, t=80, b=50),
height=400
)
return fig
def plot_attention(text, attention_weights):
"""Plot attention weights with enhanced styling."""
tokens = text.split()[:20] # Limit to first 20 tokens for better visualization
attention_weights = attention_weights[:len(tokens)]
if isinstance(attention_weights, (list, np.ndarray)):
attention_weights = np.array(attention_weights).flatten()
# Normalize attention weights
if len(attention_weights) > 0 and max(attention_weights) > 0:
normalized_weights = attention_weights / max(attention_weights)
else:
normalized_weights = attention_weights
# Create gradient colors
colors = [f'rgba(102, 126, 234, {0.3 + 0.7 * float(w)})' for w in normalized_weights]
fig = go.Figure(data=[
go.Bar(
x=tokens,
y=attention_weights,
text=[f'{float(w):.3f}' for w in attention_weights],
textposition='auto',
textfont=dict(size=12, family="Inter", color="white"),
marker=dict(
color=colors,
line=dict(color='rgba(102, 126, 234, 0.8)', width=1),
),
hovertemplate='%{x}
Attention: %{y:.3f}',
)
])
fig.update_layout(
title={
'text': 'šÆ Attention Weights Analysis',
'x': 0.5,
'xanchor': 'center',
'font': {'size': 24, 'family': 'Poppins', 'color': '#1a202c'}
},
xaxis=dict(
title='Words/Tokens',
titlefont=dict(size=16, family='Inter', color='#4a5568'),
tickfont=dict(size=12, family='Inter', color='#4a5568'),
tickangle=45,
showgrid=False,
),
yaxis=dict(
title='Attention Score',
titlefont=dict(size=16, family='Inter', color='#4a5568'),
tickfont=dict(size=14, family='Inter', color='#4a5568'),
showgrid=True,
gridcolor='rgba(0,0,0,0.05)',
),
template='plotly_white',
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font={'family': 'Inter'},
margin=dict(l=50, r=50, t=80, b=100),
height=450
)
return fig
def main():
# Header Navigation
st.markdown("""
""", unsafe_allow_html=True)
# Hero Section
st.markdown("""
ā” Powered by Advanced AI Technology
š”ļø TruthCheck
Advanced Fake News Detector
š Leverage cutting-edge deep learning technology to instantly analyze and verify news articles.
Our hybrid BERT-BiLSTM model delivers precise, trustworthy results with detailed explanations.
95%+
Accuracy
<3s
Analysis Time
24/7
Available
""", unsafe_allow_html=True)
# Features Section
st.markdown("""
š¤
BERT Transformer
Utilizes state-of-the-art BERT transformer architecture for deep contextual understanding and semantic analysis of news content with unprecedented accuracy.
š§
BiLSTM Networks
Advanced bidirectional LSTM networks capture sequential patterns, temporal dependencies, and linguistic structures in news articles for comprehensive analysis.
šļø
Attention Mechanism
Sophisticated attention layers provide transparent insights into model decision-making, highlighting key phrases and suspicious content patterns.
ā”
Real-time Processing
Lightning-fast analysis delivers results in seconds, enabling immediate verification of news content without compromising accuracy or detail.
š
Confidence Scoring
Detailed confidence metrics and probability distributions provide clear insights into prediction reliability and uncertainty levels.
š
Privacy Protected
Your data is processed securely with no storage or tracking. Complete privacy protection ensures your news analysis remains confidential.
""", unsafe_allow_html=True)
# Main Content Section
st.markdown("""
""", unsafe_allow_html=True)
# Input Section
news_text = st.text_area(
"",
height=250,
placeholder="š° Paste your news article here for comprehensive AI analysis...\n\nš” Tip: Longer articles (100+ words) typically provide more accurate results.\n\nš Our AI will analyze linguistic patterns, factual consistency, and content structure to determine authenticity.",
key="news_input",
help="Enter the full text of a news article for analysis. The more complete the article, the more accurate the analysis will be."
)
st.markdown("
", unsafe_allow_html=True)
# Enhanced Button Section
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
analyze_button = st.button(
"š Analyze Article with AI",
key="analyze_button",
help="Click to start AI-powered analysis of the news article"
)
if analyze_button:
if news_text and len(news_text.strip()) > 10:
with st.spinner("š¤ AI is analyzing the article... Please wait"):
try:
result = predict_news(news_text)
# Results Container
st.markdown('
', unsafe_allow_html=True)
# Main Prediction Result
col1, col2 = st.columns([1, 1], gap="large")
with col1:
st.markdown("### šÆ AI Prediction Result")
if result['label'] == 'FAKE':
st.markdown(f'''
šØ FAKE NEWS DETECTED
{result["confidence"]:.1%}
ā ļø Warning: Our AI model has identified this content as likely misinformation based on linguistic patterns, structural analysis, and content inconsistencies.
''', unsafe_allow_html=True)
else:
st.markdown(f'''
ā
AUTHENTIC NEWS
{result["confidence"]:.1%}
ā Verified: This content appears to be legitimate news based on professional writing style, factual consistency, and structural integrity.
''', unsafe_allow_html=True)
with col2:
st.markdown("### š Confidence Breakdown")
st.markdown('
', unsafe_allow_html=True)
st.plotly_chart(plot_confidence(result['probabilities']), use_container_width=True)
st.markdown('
', unsafe_allow_html=True)
# Attention Analysis
st.markdown("### šÆ AI Attention Analysis")
st.markdown("""
š§ The visualization below reveals which words and phrases our AI model focused on during analysis.
Higher attention scores (darker colors) indicate words that significantly influenced the prediction.
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.plotly_chart(plot_attention(news_text, result['attention_weights']), use_container_width=True)
st.markdown('
', unsafe_allow_html=True)
# Detailed Analysis
st.markdown("### š Comprehensive AI Analysis")
if result['label'] == 'FAKE':
st.markdown("""
ā ļø Misinformation Indicators
- Linguistic Anomalies: Detected language patterns commonly associated with fabricated content and misinformation campaigns
- Structural Inconsistencies: Identified irregular text flow, unusual formatting, or non-standard journalistic structure
- Content Reliability: Found potential factual inconsistencies, exaggerated claims, or misleading statements
- Emotional Manipulation: High attention on emotionally charged language designed to provoke strong reactions
- Source Credibility: Writing style and presentation lack hallmarks of professional journalism
š”ļø Recommended Actions
- Verify Sources: Cross-reference information with multiple reputable news outlets and official sources
- Check Facts: Use fact-checking websites like Snopes, PolitiFact, or FactCheck.org for verification
- Avoid Sharing: Do not share this content until authenticity is confirmed through reliable sources
- Report Misinformation: Consider reporting to platform moderators if shared on social media
- Stay Informed: Follow trusted news sources for accurate information on this topic
""", unsafe_allow_html=True)
else:
st.markdown("""
ā
Authenticity Indicators
- Professional Language: Demonstrates standard journalistic writing style with balanced, objective reporting tone
- Structural Integrity: Follows conventional news article format with proper introduction, body, and conclusion
- Factual Consistency: Information appears coherent, logically structured, and factually consistent throughout
- Neutral Presentation: Maintains objectivity without excessive emotional language or bias indicators
- Credible Content: Contains specific details, proper context, and verifiable information patterns
š Best Practices
- Continue Verification: While likely authentic, always cross-reference important news from multiple sources
- Check Publication Date: Ensure the information is current and hasn't been superseded by newer developments
- Verify Author Credentials: Research the author's background and expertise in the subject matter
- Review Source Reputation: Confirm the publication's credibility and editorial standards
- Stay Updated: Monitor for any corrections, updates, or follow-up reporting on the topic
""", unsafe_allow_html=True)
# Technical Details
with st.expander("š§ Technical Analysis Details", expanded=False):
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
label="šÆ Prediction Confidence",
value=f"{result['confidence']:.2%}",
help="Overall confidence in the AI's prediction"
)
with col2:
st.metric(
label="š REAL Probability",
value=f"{result['probabilities']['REAL']:.2%}",
help="Probability that the content is authentic news"
)
with col3:
st.metric(
label="ā ļø FAKE Probability",
value=f"{result['probabilities']['FAKE']:.2%}",
help="Probability that the content is fake news"
)
st.markdown("---")
st.markdown("""
**š¤ Model Information:**
- **Architecture:** Hybrid BERT + BiLSTM with Attention Mechanism
- **Training Data:** Extensive dataset of verified real and fake news articles
- **Features:** Contextual embeddings, sequential patterns, attention weights
- **Performance:** 95%+ accuracy on validation datasets
""")
st.markdown('
', unsafe_allow_html=True)
except Exception as e:
st.error(f"""
šØ **Analysis Error Occurred**
We encountered an issue while analyzing your article. This might be due to:
- Technical server issues
- Content formatting problems
- Model loading difficulties
**Error Details:** {str(e)}
Please try again in a few moments or contact support if the issue persists.
""")
else:
st.markdown('''
ā ļø Input Required
Please enter a news article (at least 10 words) to perform AI analysis.
š” Tip: Longer, complete articles provide more accurate results.
''', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# Footer
st.markdown("""
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()