import pandas as pd |
import numpy as np |
import torch |
from sklearn.model_selection import train_test_split |
from sklearn.feature_extraction.text import TfidfVectorizer |
from torch.utils.data import Dataset, DataLoader |
import gradio as gr |
true_news = pd.read_csv('True.csv') |
fake_news = pd.read_csv('Fake.csv') |
true_news['label'] = 1 |
fake_news['label'] = 0 |
df = pd.concat([true_news, fake_news], ignore_index=True) |
import re |
import nltk |
from nltk.corpus import stopwords |
nltk.download('stopwords') |
def preprocess_text(text): |
text = re.sub(r'[^a-zA-Z\s]', '', text) |
text = text.lower() |
stop_words = set(stopwords.words('english')) |
text = ' '.join([word for word in text.split() if word not in stop_words]) |
return text |
df['cleaned_text'] = df['text'].apply(preprocess_text) |
vectorizer = TfidfVectorizer(max_features=5000) |
X = vectorizer.fit_transform(df['cleaned_text']).toarray() |
y = df['label'].values |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
class NewsDataset(Dataset): |
def __init__(self, X, y): |
self.X = torch.FloatTensor(X) |
self.y = torch.LongTensor(y) |
def __len__(self): |
return len(self.y) |
def __getitem__(self, idx): |
return self.X[idx], self.y[idx] |
train_dataset = NewsDataset(X_train, y_train) |
test_dataset = NewsDataset(X_test, y_test) |
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) |
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) |
class FakeNewsDetector(torch.nn.Module): |
def __init__(self, input_dim): |
super(FakeNewsDetector, self).__init__() |
self.fc1 = torch.nn.Linear(input_dim, 64) |
self.fc2 = torch.nn.Linear(64, 16) |
self.fc3 = torch.nn.Linear(16, 2) |
self.relu = torch.nn.ReLU() |
def forward(self, x): |
x = self.relu(self.fc1(x)) |
x = self.relu(self.fc2(x)) |
x = self.fc3(x) |
return x |
model = FakeNewsDetector(X_train.shape[1]) |
criterion = torch.nn.CrossEntropyLoss() |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) |
num_epochs = 10 |
for epoch in range(num_epochs): |
model.train() |
for batch_X, batch_y in train_loader: |
optimizer.zero_grad() |
outputs = model(batch_X) |
loss = criterion(outputs, batch_y) |
loss.backward() |
optimizer.step() |
model.eval() |
correct = 0 |
total = 0 |
with torch.no_grad(): |
for batch_X, batch_y in test_loader: |
outputs = model(batch_X) |
_, predicted = torch.max(outputs.data, 1) |
total += batch_y.size(0) |
correct += (predicted == batch_y).sum().item() |
accuracy = 100 * correct / total |
print(f'Epoch [{epoch+1}/{num_epochs}], Accuracy: {accuracy:.2f}%') |
def predict_fake_news(text): |
cleaned = preprocess_text(text) |
vectorized = vectorizer.transform([cleaned]).toarray() |
tensor = torch.FloatTensor(vectorized) |
model.eval() |
with torch.no_grad(): |
output = model(tensor) |
_, predicted = torch.max(output.data, 1) |
return "Fake News" if predicted.item() == 0 else "True News" |
example_text = "Scientists discover new planet capable of supporting life" |
prediction = predict_fake_news(example_text) |
print(f"Prediction: {prediction}") |
def gradio_interface(text): |
prediction = predict_fake_news(text) |
return prediction |
iface = gr.Interface( |
fn=gradio_interface, |
inputs="text", |
outputs="text", |
title="Fake News Detector", |
description="Enter a news headline or text to predict whether it is Fake News or True News." |
) |
if __name__ == "__main__": |
iface.launch(share=True) |