Fact_Checker / final.py
Krish Patel
Changed ML Model
948df4b
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import spacy
import google.generativeai as genai
import json
import os
import dotenv
dotenv.load_dotenv()
# Load spaCy for NER
nlp = spacy.load("en_core_web_sm")
# Load the trained ML model
model_path = "./results/checkpoint-753" # Replace with the actual path to your model
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()
def setup_gemini():
genai.configure(api_key=os.getenv("GEMINI_API"))
model = genai.GenerativeModel('gemini-pro')
return model
def predict_with_model(text):
"""Predict whether the news is real or fake using the ML model."""
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
predicted_label = torch.argmax(probabilities, dim=-1).item()
return "FAKE" if predicted_label == 1 else "REAL"
def extract_entities(text):
"""Extract named entities from text using spaCy."""
doc = nlp(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
return entities
def predict_news(text):
"""Predict whether the news is real or fake using the ML model."""
# Predict with the ML model
prediction = predict_with_model(text)
return prediction
def analyze_content_gemini(model, text):
prompt = f"""Analyze this news text and return a JSON object with the following structure:
{{
"gemini_analysis": {{
"predicted_classification": "Real or Fake",
"confidence_score": "0-100",
"reasoning": ["point1", "point2"]
}},
"text_classification": {{
"category": "",
"writing_style": "Formal/Informal/Clickbait",
"target_audience": "",
"content_type": "news/opinion/editorial"
}},
"sentiment_analysis": {{
"primary_emotion": "",
"emotional_intensity": "1-10",
"sensationalism_level": "High/Medium/Low",
"bias_indicators": ["bias1", "bias2"],
"tone": {{"formality": "formal/informal", "style": "Professional/Emotional/Neutral"}},
"emotional_triggers": ["trigger1", "trigger2"]
}},
"entity_recognition": {{
"source_credibility": "High/Medium/Low",
"people": ["person1", "person2"],
"organizations": ["org1", "org2"],
"locations": ["location1", "location2"],
"dates": ["date1", "date2"],
"statistics": ["stat1", "stat2"]
}},
"context": {{
"main_narrative": "",
"supporting_elements": ["element1", "element2"],
"key_claims": ["claim1", "claim2"],
"narrative_structure": ""
}},
"fact_checking": {{
"verifiable_claims": ["claim1", "claim2"],
"evidence_present": "Yes/No",
"fact_check_score": "0-100"
}}
}}
Analyze this text and return only the JSON response: {text}"""
response = model.generate_content(prompt)
try:
cleaned_text = response.text.strip()
if cleaned_text.startswith('```json'):
cleaned_text = cleaned_text[7:-3]
return json.loads(cleaned_text)
except json.JSONDecodeError:
return {
"gemini_analysis": {
"predicted_classification": "UNCERTAIN",
"confidence_score": "50",
"reasoning": ["Analysis failed to generate valid JSON"]
}
}
def clean_gemini_output(text):
"""Remove markdown formatting from Gemini output"""
text = text.replace('##', '')
text = text.replace('**', '')
return text
def get_gemini_analysis(text):
"""Get detailed content analysis from Gemini."""
gemini_model = setup_gemini()
gemini_analysis = analyze_content_gemini(gemini_model, text)
return gemini_analysis
def main():
print("Welcome to the News Classifier!")
print("Enter your news text below. Type 'Exit' to quit.")
while True:
news_text = input("\nEnter news text: ")
if news_text.lower() == 'exit':
print("Thank you for using the News Classifier!")
return
# Get ML prediction
prediction = predict_news(news_text)
print(f"\nML Analysis: {prediction}")
# Get Gemini analysis
print("\n=== Detailed Gemini Analysis ===")
gemini_result = get_gemini_analysis(news_text)
print(gemini_result)
if __name__ == "__main__":
main()