File size: 4,786 Bytes
990f77e b6597a0 990f77e 948df4b 990f77e b6597a0 990f77e 4adafc2 990f77e 4adafc2 990f77e 4adafc2 990f77e 4adafc2 990f77e 4adafc2 990f77e 4adafc2 990f77e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import spacy
import google.generativeai as genai
import json
import os
import dotenv
dotenv.load_dotenv()
# Load spaCy for NER
nlp = spacy.load("en_core_web_sm")
# Load the trained ML model
model_path = "./results/checkpoint-753" # Replace with the actual path to your model
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()
def setup_gemini():
genai.configure(api_key=os.getenv("GEMINI_API"))
model = genai.GenerativeModel('gemini-pro')
return model
def predict_with_model(text):
"""Predict whether the news is real or fake using the ML model."""
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
predicted_label = torch.argmax(probabilities, dim=-1).item()
return "FAKE" if predicted_label == 1 else "REAL"
def extract_entities(text):
"""Extract named entities from text using spaCy."""
doc = nlp(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
return entities
def predict_news(text):
"""Predict whether the news is real or fake using the ML model."""
# Predict with the ML model
prediction = predict_with_model(text)
return prediction
def analyze_content_gemini(model, text):
prompt = f"""Analyze this news text and return a JSON object with the following structure:
{{
"gemini_analysis": {{
"predicted_classification": "Real or Fake",
"confidence_score": "0-100",
"reasoning": ["point1", "point2"]
}},
"text_classification": {{
"category": "",
"writing_style": "Formal/Informal/Clickbait",
"target_audience": "",
"content_type": "news/opinion/editorial"
}},
"sentiment_analysis": {{
"primary_emotion": "",
"emotional_intensity": "1-10",
"sensationalism_level": "High/Medium/Low",
"bias_indicators": ["bias1", "bias2"],
"tone": {{"formality": "formal/informal", "style": "Professional/Emotional/Neutral"}},
"emotional_triggers": ["trigger1", "trigger2"]
}},
"entity_recognition": {{
"source_credibility": "High/Medium/Low",
"people": ["person1", "person2"],
"organizations": ["org1", "org2"],
"locations": ["location1", "location2"],
"dates": ["date1", "date2"],
"statistics": ["stat1", "stat2"]
}},
"context": {{
"main_narrative": "",
"supporting_elements": ["element1", "element2"],
"key_claims": ["claim1", "claim2"],
"narrative_structure": ""
}},
"fact_checking": {{
"verifiable_claims": ["claim1", "claim2"],
"evidence_present": "Yes/No",
"fact_check_score": "0-100"
}}
}}
Analyze this text and return only the JSON response: {text}"""
response = model.generate_content(prompt)
try:
cleaned_text = response.text.strip()
if cleaned_text.startswith('```json'):
cleaned_text = cleaned_text[7:-3]
return json.loads(cleaned_text)
except json.JSONDecodeError:
return {
"gemini_analysis": {
"predicted_classification": "UNCERTAIN",
"confidence_score": "50",
"reasoning": ["Analysis failed to generate valid JSON"]
}
}
def clean_gemini_output(text):
"""Remove markdown formatting from Gemini output"""
text = text.replace('##', '')
text = text.replace('**', '')
return text
def get_gemini_analysis(text):
"""Get detailed content analysis from Gemini."""
gemini_model = setup_gemini()
gemini_analysis = analyze_content_gemini(gemini_model, text)
return gemini_analysis
def main():
print("Welcome to the News Classifier!")
print("Enter your news text below. Type 'Exit' to quit.")
while True:
news_text = input("\nEnter news text: ")
if news_text.lower() == 'exit':
print("Thank you for using the News Classifier!")
return
# Get ML prediction
prediction = predict_news(news_text)
print(f"\nML Analysis: {prediction}")
# Get Gemini analysis
print("\n=== Detailed Gemini Analysis ===")
gemini_result = get_gemini_analysis(news_text)
print(gemini_result)
if __name__ == "__main__":
main()
|