import streamlit as st import torch import pandas as pd import numpy as np from pathlib import Path import sys import plotly.express as px import plotly.graph_objects as go from transformers import BertTokenizer import nltk # Download required NLTK data try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') try: nltk.data.find('corpora/stopwords') except LookupError: nltk.download('stopwords') try: nltk.data.find('tokenizers/punkt_tab') except LookupError: nltk.download('punkt_tab') try: nltk.data.find('corpora/wordnet') except LookupError: nltk.download('wordnet') # Add project root to Python path project_root = Path(__file__).parent.parent sys.path.append(str(project_root)) from src.models.hybrid_model import HybridFakeNewsDetector from src.config.config import * from src.data.preprocessor import TextPreprocessor # Custom CSS for modern, enhanced styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model_and_tokenizer(): """Load the model and tokenizer (cached).""" model = HybridFakeNewsDetector( bert_model_name=BERT_MODEL_NAME, lstm_hidden_size=LSTM_HIDDEN_SIZE, lstm_num_layers=LSTM_NUM_LAYERS, dropout_rate=DROPOUT_RATE ) state_dict = torch.load(SAVED_MODELS_DIR / "final_model.pt", map_location=torch.device('cpu')) model_state_dict = model.state_dict() filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict} model.load_state_dict(filtered_state_dict, strict=False) model.eval() tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME) return model, tokenizer @st.cache_resource def get_preprocessor(): """Get the text preprocessor (cached).""" return TextPreprocessor() def predict_news(text): """Predict if the given news is fake or real.""" model, tokenizer = load_model_and_tokenizer() preprocessor = get_preprocessor() processed_text = preprocessor.preprocess_text(text) encoding = tokenizer.encode_plus( processed_text, add_special_tokens=True, max_length=MAX_SEQUENCE_LENGTH, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt' ) with torch.no_grad(): outputs = model( encoding['input_ids'], encoding['attention_mask'] ) probabilities = torch.softmax(outputs['logits'], dim=1) prediction = torch.argmax(outputs['logits'], dim=1) attention_weights = outputs['attention_weights'] attention_weights_np = attention_weights[0].cpu().numpy() return { 'prediction': prediction.item(), 'label': 'FAKE' if prediction.item() == 1 else 'REAL', 'confidence': torch.max(probabilities, dim=1)[0].item(), 'probabilities': { 'REAL': probabilities[0][0].item(), 'FAKE': probabilities[0][1].item() }, 'attention_weights': attention_weights_np } def plot_confidence(probabilities): """Plot prediction confidence with enhanced styling.""" colors = ['#22c55e', '#ef4444'] fig = go.Figure(data=[ go.Bar( x=list(probabilities.keys()), y=list(probabilities.values()), text=[f'{p:.1%}' for p in probabilities.values()], textposition='auto', textfont=dict(size=16, family="Poppins", color="white"), marker=dict( color=colors, line=dict(color='rgba(255,255,255,0.3)', width=2), pattern_shape="", ), hovertemplate='%{x}
Confidence: %{y:.1%}', width=[0.6, 0.6] ) ]) fig.update_layout( title={ 'text': 'šŸ“Š Prediction Confidence', 'x': 0.5, 'xanchor': 'center', 'font': {'size': 24, 'family': 'Poppins', 'color': '#1a202c'} }, xaxis=dict( title='Classification', titlefont=dict(size=16, family='Inter', color='#4a5568'), tickfont=dict(size=14, family='Inter', color='#4a5568'), showgrid=False, ), yaxis=dict( title='Probability', titlefont=dict(size=16, family='Inter', color='#4a5568'), tickfont=dict(size=14, family='Inter', color='#4a5568'), range=[0, 1], tickformat='.0%', showgrid=True, gridcolor='rgba(0,0,0,0.05)', ), template='plotly_white', plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font={'family': 'Inter'}, margin=dict(l=50, r=50, t=80, b=50), height=400 ) return fig def plot_attention(text, attention_weights): """Plot attention weights with enhanced styling.""" tokens = text.split()[:20] # Limit to first 20 tokens for better visualization attention_weights = attention_weights[:len(tokens)] if isinstance(attention_weights, (list, np.ndarray)): attention_weights = np.array(attention_weights).flatten() # Normalize attention weights if len(attention_weights) > 0 and max(attention_weights) > 0: normalized_weights = attention_weights / max(attention_weights) else: normalized_weights = attention_weights # Create gradient colors colors = [f'rgba(102, 126, 234, {0.3 + 0.7 * float(w)})' for w in normalized_weights] fig = go.Figure(data=[ go.Bar( x=tokens, y=attention_weights, text=[f'{float(w):.3f}' for w in attention_weights], textposition='auto', textfont=dict(size=12, family="Inter", color="white"), marker=dict( color=colors, line=dict(color='rgba(102, 126, 234, 0.8)', width=1), ), hovertemplate='%{x}
Attention: %{y:.3f}', ) ]) fig.update_layout( title={ 'text': 'šŸŽÆ Attention Weights Analysis', 'x': 0.5, 'xanchor': 'center', 'font': {'size': 24, 'family': 'Poppins', 'color': '#1a202c'} }, xaxis=dict( title='Words/Tokens', titlefont=dict(size=16, family='Inter', color='#4a5568'), tickfont=dict(size=12, family='Inter', color='#4a5568'), tickangle=45, showgrid=False, ), yaxis=dict( title='Attention Score', titlefont=dict(size=16, family='Inter', color='#4a5568'), tickfont=dict(size=14, family='Inter', color='#4a5568'), showgrid=True, gridcolor='rgba(0,0,0,0.05)', ), template='plotly_white', plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font={'family': 'Inter'}, margin=dict(l=50, r=50, t=80, b=100), height=450 ) return fig def main(): # Header Navigation st.markdown("""
""", unsafe_allow_html=True) # Hero Section st.markdown("""
⚔ Powered by Advanced AI Technology

šŸ›”ļø TruthCheck

Advanced Fake News Detector

šŸ” Leverage cutting-edge deep learning technology to instantly analyze and verify news articles. Our hybrid BERT-BiLSTM model delivers precise, trustworthy results with detailed explanations.

95%+ Accuracy
<3s Analysis Time
24/7 Available
""", unsafe_allow_html=True) # Features Section st.markdown("""
šŸš€ Advanced Features

Why Choose TruthCheck?

Our state-of-the-art AI combines multiple advanced technologies to deliver unparalleled accuracy in fake news detection

šŸ¤–

BERT Transformer

Utilizes state-of-the-art BERT transformer architecture for deep contextual understanding and semantic analysis of news content with unprecedented accuracy.

🧠

BiLSTM Networks

Advanced bidirectional LSTM networks capture sequential patterns, temporal dependencies, and linguistic structures in news articles for comprehensive analysis.

šŸ‘ļø

Attention Mechanism

Sophisticated attention layers provide transparent insights into model decision-making, highlighting key phrases and suspicious content patterns.

⚔

Real-time Processing

Lightning-fast analysis delivers results in seconds, enabling immediate verification of news content without compromising accuracy or detail.

šŸ“Š

Confidence Scoring

Detailed confidence metrics and probability distributions provide clear insights into prediction reliability and uncertainty levels.

šŸ”’

Privacy Protected

Your data is processed securely with no storage or tracking. Complete privacy protection ensures your news analysis remains confidential.

""", unsafe_allow_html=True) # Main Content Section st.markdown("""
šŸ” AI Analysis

Analyze News Article

šŸ“ Simply paste any news article below and our advanced AI will provide instant, detailed analysis with confidence scores, attention weights, and comprehensive insights.

""", unsafe_allow_html=True) # Input Section news_text = st.text_area( "", height=250, placeholder="šŸ“° Paste your news article here for comprehensive AI analysis...\n\nšŸ’” Tip: Longer articles (100+ words) typically provide more accurate results.\n\nšŸš€ Our AI will analyze linguistic patterns, factual consistency, and content structure to determine authenticity.", key="news_input", help="Enter the full text of a news article for analysis. The more complete the article, the more accurate the analysis will be." ) st.markdown("
", unsafe_allow_html=True) # Enhanced Button Section col1, col2, col3 = st.columns([1, 2, 1]) with col2: analyze_button = st.button( "šŸ” Analyze Article with AI", key="analyze_button", help="Click to start AI-powered analysis of the news article" ) if analyze_button: if news_text and len(news_text.strip()) > 10: with st.spinner("šŸ¤– AI is analyzing the article... Please wait"): try: result = predict_news(news_text) # Results Container st.markdown('
', unsafe_allow_html=True) # Main Prediction Result col1, col2 = st.columns([1, 1], gap="large") with col1: st.markdown("### šŸŽÆ AI Prediction Result") if result['label'] == 'FAKE': st.markdown(f'''
🚨 FAKE NEWS DETECTED {result["confidence"]:.1%}
āš ļø Warning: Our AI model has identified this content as likely misinformation based on linguistic patterns, structural analysis, and content inconsistencies.
''', unsafe_allow_html=True) else: st.markdown(f'''
āœ… AUTHENTIC NEWS {result["confidence"]:.1%}
āœ“ Verified: This content appears to be legitimate news based on professional writing style, factual consistency, and structural integrity.
''', unsafe_allow_html=True) with col2: st.markdown("### šŸ“ˆ Confidence Breakdown") st.markdown('
', unsafe_allow_html=True) st.plotly_chart(plot_confidence(result['probabilities']), use_container_width=True) st.markdown('
', unsafe_allow_html=True) # Attention Analysis st.markdown("### šŸŽÆ AI Attention Analysis") st.markdown("""

🧠 The visualization below reveals which words and phrases our AI model focused on during analysis. Higher attention scores (darker colors) indicate words that significantly influenced the prediction.

""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.plotly_chart(plot_attention(news_text, result['attention_weights']), use_container_width=True) st.markdown('
', unsafe_allow_html=True) # Detailed Analysis st.markdown("### šŸ” Comprehensive AI Analysis") if result['label'] == 'FAKE': st.markdown("""

āš ļø Misinformation Indicators

  • Linguistic Anomalies: Detected language patterns commonly associated with fabricated content and misinformation campaigns
  • Structural Inconsistencies: Identified irregular text flow, unusual formatting, or non-standard journalistic structure
  • Content Reliability: Found potential factual inconsistencies, exaggerated claims, or misleading statements
  • Emotional Manipulation: High attention on emotionally charged language designed to provoke strong reactions
  • Source Credibility: Writing style and presentation lack hallmarks of professional journalism

šŸ›”ļø Recommended Actions

  • Verify Sources: Cross-reference information with multiple reputable news outlets and official sources
  • Check Facts: Use fact-checking websites like Snopes, PolitiFact, or FactCheck.org for verification
  • Avoid Sharing: Do not share this content until authenticity is confirmed through reliable sources
  • Report Misinformation: Consider reporting to platform moderators if shared on social media
  • Stay Informed: Follow trusted news sources for accurate information on this topic
""", unsafe_allow_html=True) else: st.markdown("""

āœ… Authenticity Indicators

  • Professional Language: Demonstrates standard journalistic writing style with balanced, objective reporting tone
  • Structural Integrity: Follows conventional news article format with proper introduction, body, and conclusion
  • Factual Consistency: Information appears coherent, logically structured, and factually consistent throughout
  • Neutral Presentation: Maintains objectivity without excessive emotional language or bias indicators
  • Credible Content: Contains specific details, proper context, and verifiable information patterns

šŸ“‹ Best Practices

  • Continue Verification: While likely authentic, always cross-reference important news from multiple sources
  • Check Publication Date: Ensure the information is current and hasn't been superseded by newer developments
  • Verify Author Credentials: Research the author's background and expertise in the subject matter
  • Review Source Reputation: Confirm the publication's credibility and editorial standards
  • Stay Updated: Monitor for any corrections, updates, or follow-up reporting on the topic
""", unsafe_allow_html=True) # Technical Details with st.expander("šŸ”§ Technical Analysis Details", expanded=False): col1, col2, col3 = st.columns(3) with col1: st.metric( label="šŸŽÆ Prediction Confidence", value=f"{result['confidence']:.2%}", help="Overall confidence in the AI's prediction" ) with col2: st.metric( label="šŸ“Š REAL Probability", value=f"{result['probabilities']['REAL']:.2%}", help="Probability that the content is authentic news" ) with col3: st.metric( label="āš ļø FAKE Probability", value=f"{result['probabilities']['FAKE']:.2%}", help="Probability that the content is fake news" ) st.markdown("---") st.markdown(""" **šŸ¤– Model Information:** - **Architecture:** Hybrid BERT + BiLSTM with Attention Mechanism - **Training Data:** Extensive dataset of verified real and fake news articles - **Features:** Contextual embeddings, sequential patterns, attention weights - **Performance:** 95%+ accuracy on validation datasets """) st.markdown('
', unsafe_allow_html=True) except Exception as e: st.error(f""" 🚨 **Analysis Error Occurred** We encountered an issue while analyzing your article. This might be due to: - Technical server issues - Content formatting problems - Model loading difficulties **Error Details:** {str(e)} Please try again in a few moments or contact support if the issue persists. """) else: st.markdown('''

āš ļø Input Required

Please enter a news article (at least 10 words) to perform AI analysis.
šŸ’” Tip: Longer, complete articles provide more accurate results.

''', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # Footer st.markdown(""" """, unsafe_allow_html=True) if __name__ == "__main__": main()