File size: 4,786 Bytes

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import spacy
import google.generativeai as genai
import json
import os
import dotenv

dotenv.load_dotenv()

# Load spaCy for NER
nlp = spacy.load("en_core_web_sm")

# Load the trained ML model
model_path = "./results/checkpoint-753"  # Replace with the actual path to your model
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

def setup_gemini():
    genai.configure(api_key=os.getenv("GEMINI_API"))
    model = genai.GenerativeModel('gemini-pro')
    return model

def predict_with_model(text):
    """Predict whether the news is real or fake using the ML model."""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_label = torch.argmax(probabilities, dim=-1).item()
    return "FAKE" if predicted_label == 1 else "REAL"

def extract_entities(text):
    """Extract named entities from text using spaCy."""
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

def predict_news(text):
    """Predict whether the news is real or fake using the ML model."""
    # Predict with the ML model
    prediction = predict_with_model(text)
    return prediction

def analyze_content_gemini(model, text):
    prompt = f"""Analyze this news text and return a JSON object with the following structure:
    {{
        "gemini_analysis": {{
            "predicted_classification": "Real or Fake",
            "confidence_score": "0-100",
            "reasoning": ["point1", "point2"]
        }},
        "text_classification": {{
            "category": "",
            "writing_style": "Formal/Informal/Clickbait",
            "target_audience": "",
            "content_type": "news/opinion/editorial"
        }},
        "sentiment_analysis": {{
            "primary_emotion": "",
            "emotional_intensity": "1-10",
            "sensationalism_level": "High/Medium/Low",
            "bias_indicators": ["bias1", "bias2"],
            "tone": {{"formality": "formal/informal", "style": "Professional/Emotional/Neutral"}},
            "emotional_triggers": ["trigger1", "trigger2"]
        }},
        "entity_recognition": {{
            "source_credibility": "High/Medium/Low",
            "people": ["person1", "person2"],
            "organizations": ["org1", "org2"],
            "locations": ["location1", "location2"],
            "dates": ["date1", "date2"],
            "statistics": ["stat1", "stat2"]
        }},
        "context": {{
            "main_narrative": "",
            "supporting_elements": ["element1", "element2"],
            "key_claims": ["claim1", "claim2"],
            "narrative_structure": ""
        }},
        "fact_checking": {{
            "verifiable_claims": ["claim1", "claim2"],
            "evidence_present": "Yes/No",
            "fact_check_score": "0-100"
        }}
    }}

    Analyze this text and return only the JSON response: {text}"""
    
    response = model.generate_content(prompt)
    try:
        cleaned_text = response.text.strip()
        if cleaned_text.startswith('```json'):
            cleaned_text = cleaned_text[7:-3]
        return json.loads(cleaned_text)
    except json.JSONDecodeError:
        return {
            "gemini_analysis": {
                "predicted_classification": "UNCERTAIN",
                "confidence_score": "50",
                "reasoning": ["Analysis failed to generate valid JSON"]
            }
        }

def clean_gemini_output(text):
    """Remove markdown formatting from Gemini output"""
    text = text.replace('##', '')
    text = text.replace('**', '')
    return text

def get_gemini_analysis(text):
    """Get detailed content analysis from Gemini."""
    gemini_model = setup_gemini()
    gemini_analysis = analyze_content_gemini(gemini_model, text)
    return gemini_analysis

def main():
    print("Welcome to the News Classifier!")
    print("Enter your news text below. Type 'Exit' to quit.")
    
    while True:
        news_text = input("\nEnter news text: ")
        
        if news_text.lower() == 'exit':
            print("Thank you for using the News Classifier!")
            return
            
        # Get ML prediction
        prediction = predict_news(news_text)
        print(f"\nML Analysis: {prediction}")
        
        # Get Gemini analysis
        print("\n=== Detailed Gemini Analysis ===")
        gemini_result = get_gemini_analysis(news_text)
        print(gemini_result)

if __name__ == "__main__":
    main()