|
import pandas as pd |
|
import numpy as np |
|
import torch |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from torch.utils.data import Dataset, DataLoader |
|
import gradio as gr |
|
|
|
true_news = pd.read_csv('True.csv') |
|
fake_news = pd.read_csv('Fake.csv') |
|
|
|
true_news['label'] = 1 |
|
fake_news['label'] = 0 |
|
df = pd.concat([true_news, fake_news], ignore_index=True) |
|
|
|
import re |
|
import nltk |
|
from nltk.corpus import stopwords |
|
nltk.download('stopwords') |
|
|
|
def preprocess_text(text): |
|
|
|
text = re.sub(r'[^a-zA-Z\s]', '', text) |
|
|
|
text = text.lower() |
|
|
|
stop_words = set(stopwords.words('english')) |
|
text = ' '.join([word for word in text.split() if word not in stop_words]) |
|
return text |
|
|
|
df['cleaned_text'] = df['text'].apply(preprocess_text) |
|
|
|
vectorizer = TfidfVectorizer(max_features=5000) |
|
X = vectorizer.fit_transform(df['cleaned_text']).toarray() |
|
y = df['label'].values |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
class NewsDataset(Dataset): |
|
def __init__(self, X, y): |
|
self.X = torch.FloatTensor(X) |
|
self.y = torch.LongTensor(y) |
|
|
|
def __len__(self): |
|
return len(self.y) |
|
|
|
def __getitem__(self, idx): |
|
return self.X[idx], self.y[idx] |
|
|
|
train_dataset = NewsDataset(X_train, y_train) |
|
test_dataset = NewsDataset(X_test, y_test) |
|
|
|
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) |
|
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) |
|
|
|
class FakeNewsDetector(torch.nn.Module): |
|
def __init__(self, input_dim): |
|
super(FakeNewsDetector, self).__init__() |
|
self.fc1 = torch.nn.Linear(input_dim, 64) |
|
self.fc2 = torch.nn.Linear(64, 16) |
|
self.fc3 = torch.nn.Linear(16, 2) |
|
self.relu = torch.nn.ReLU() |
|
|
|
def forward(self, x): |
|
x = self.relu(self.fc1(x)) |
|
x = self.relu(self.fc2(x)) |
|
x = self.fc3(x) |
|
return x |
|
|
|
model = FakeNewsDetector(X_train.shape[1]) |
|
criterion = torch.nn.CrossEntropyLoss() |
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) |
|
|
|
num_epochs = 10 |
|
|
|
for epoch in range(num_epochs): |
|
model.train() |
|
for batch_X, batch_y in train_loader: |
|
optimizer.zero_grad() |
|
outputs = model(batch_X) |
|
loss = criterion(outputs, batch_y) |
|
loss.backward() |
|
optimizer.step() |
|
|
|
|
|
model.eval() |
|
correct = 0 |
|
total = 0 |
|
with torch.no_grad(): |
|
for batch_X, batch_y in test_loader: |
|
outputs = model(batch_X) |
|
_, predicted = torch.max(outputs.data, 1) |
|
total += batch_y.size(0) |
|
correct += (predicted == batch_y).sum().item() |
|
|
|
accuracy = 100 * correct / total |
|
print(f'Epoch [{epoch+1}/{num_epochs}], Accuracy: {accuracy:.2f}%') |
|
|
|
def predict_fake_news(text): |
|
cleaned = preprocess_text(text) |
|
vectorized = vectorizer.transform([cleaned]).toarray() |
|
tensor = torch.FloatTensor(vectorized) |
|
|
|
model.eval() |
|
with torch.no_grad(): |
|
output = model(tensor) |
|
_, predicted = torch.max(output.data, 1) |
|
|
|
return "Fake News" if predicted.item() == 0 else "True News" |
|
|
|
|
|
example_text = "Scientists discover new planet capable of supporting life" |
|
prediction = predict_fake_news(example_text) |
|
print(f"Prediction: {prediction}") |
|
|
|
|
|
def gradio_interface(text): |
|
prediction = predict_fake_news(text) |
|
return prediction |
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs="text", |
|
outputs="text", |
|
title="Fake News Detector", |
|
description="Enter a news headline or text to predict whether it is Fake News or True News." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch(share=True) |