File size: 4,786 Bytes
990f77e
 
 
 
 
b6597a0
 
 
 
990f77e
 
 
 
 
948df4b
990f77e
 
 
 
 
b6597a0
990f77e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4adafc2
990f77e
4adafc2
 
990f77e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4adafc2
990f77e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4adafc2
990f77e
4adafc2
990f77e
4adafc2
990f77e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import spacy
import google.generativeai as genai
import json
import os
import dotenv

dotenv.load_dotenv()

# Load spaCy for NER
nlp = spacy.load("en_core_web_sm")

# Load the trained ML model
model_path = "./results/checkpoint-753"  # Replace with the actual path to your model
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

def setup_gemini():
    genai.configure(api_key=os.getenv("GEMINI_API"))
    model = genai.GenerativeModel('gemini-pro')
    return model

def predict_with_model(text):
    """Predict whether the news is real or fake using the ML model."""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_label = torch.argmax(probabilities, dim=-1).item()
    return "FAKE" if predicted_label == 1 else "REAL"

def extract_entities(text):
    """Extract named entities from text using spaCy."""
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

def predict_news(text):
    """Predict whether the news is real or fake using the ML model."""
    # Predict with the ML model
    prediction = predict_with_model(text)
    return prediction

def analyze_content_gemini(model, text):
    prompt = f"""Analyze this news text and return a JSON object with the following structure:
    {{
        "gemini_analysis": {{
            "predicted_classification": "Real or Fake",
            "confidence_score": "0-100",
            "reasoning": ["point1", "point2"]
        }},
        "text_classification": {{
            "category": "",
            "writing_style": "Formal/Informal/Clickbait",
            "target_audience": "",
            "content_type": "news/opinion/editorial"
        }},
        "sentiment_analysis": {{
            "primary_emotion": "",
            "emotional_intensity": "1-10",
            "sensationalism_level": "High/Medium/Low",
            "bias_indicators": ["bias1", "bias2"],
            "tone": {{"formality": "formal/informal", "style": "Professional/Emotional/Neutral"}},
            "emotional_triggers": ["trigger1", "trigger2"]
        }},
        "entity_recognition": {{
            "source_credibility": "High/Medium/Low",
            "people": ["person1", "person2"],
            "organizations": ["org1", "org2"],
            "locations": ["location1", "location2"],
            "dates": ["date1", "date2"],
            "statistics": ["stat1", "stat2"]
        }},
        "context": {{
            "main_narrative": "",
            "supporting_elements": ["element1", "element2"],
            "key_claims": ["claim1", "claim2"],
            "narrative_structure": ""
        }},
        "fact_checking": {{
            "verifiable_claims": ["claim1", "claim2"],
            "evidence_present": "Yes/No",
            "fact_check_score": "0-100"
        }}
    }}

    Analyze this text and return only the JSON response: {text}"""
    
    response = model.generate_content(prompt)
    try:
        cleaned_text = response.text.strip()
        if cleaned_text.startswith('```json'):
            cleaned_text = cleaned_text[7:-3]
        return json.loads(cleaned_text)
    except json.JSONDecodeError:
        return {
            "gemini_analysis": {
                "predicted_classification": "UNCERTAIN",
                "confidence_score": "50",
                "reasoning": ["Analysis failed to generate valid JSON"]
            }
        }

def clean_gemini_output(text):
    """Remove markdown formatting from Gemini output"""
    text = text.replace('##', '')
    text = text.replace('**', '')
    return text

def get_gemini_analysis(text):
    """Get detailed content analysis from Gemini."""
    gemini_model = setup_gemini()
    gemini_analysis = analyze_content_gemini(gemini_model, text)
    return gemini_analysis

def main():
    print("Welcome to the News Classifier!")
    print("Enter your news text below. Type 'Exit' to quit.")
    
    while True:
        news_text = input("\nEnter news text: ")
        
        if news_text.lower() == 'exit':
            print("Thank you for using the News Classifier!")
            return
            
        # Get ML prediction
        prediction = predict_news(news_text)
        print(f"\nML Analysis: {prediction}")
        
        # Get Gemini analysis
        print("\n=== Detailed Gemini Analysis ===")
        gemini_result = get_gemini_analysis(news_text)
        print(gemini_result)

if __name__ == "__main__":
    main()