File size: 3,805 Bytes
c44809b e400ad2 c44809b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from torch.utils.data import Dataset, DataLoader
import gradio as gr
true_news = pd.read_csv('True.csv')
fake_news = pd.read_csv('Fake.csv')
true_news['label'] = 1
fake_news['label'] = 0
df = pd.concat([true_news, fake_news], ignore_index=True)
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
def preprocess_text(text):
# Remove special characters
text = re.sub(r'[^a-zA-Z\s]', '', text)
# Convert to lowercase
text = text.lower()
# Remove stopwords
stop_words = set(stopwords.words('english'))
text = ' '.join([word for word in text.split() if word not in stop_words])
return text
df['cleaned_text'] = df['text'].apply(preprocess_text)
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['cleaned_text']).toarray()
y = df['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class NewsDataset(Dataset):
def __init__(self, X, y):
self.X = torch.FloatTensor(X)
self.y = torch.LongTensor(y)
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
train_dataset = NewsDataset(X_train, y_train)
test_dataset = NewsDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
class FakeNewsDetector(torch.nn.Module):
def __init__(self, input_dim):
super(FakeNewsDetector, self).__init__()
self.fc1 = torch.nn.Linear(input_dim, 64)
self.fc2 = torch.nn.Linear(64, 16)
self.fc3 = torch.nn.Linear(16, 2)
self.relu = torch.nn.ReLU()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
model = FakeNewsDetector(X_train.shape[1])
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
model.train()
for batch_X, batch_y in train_loader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
for batch_X, batch_y in test_loader:
outputs = model(batch_X)
_, predicted = torch.max(outputs.data, 1)
total += batch_y.size(0)
correct += (predicted == batch_y).sum().item()
accuracy = 100 * correct / total
print(f'Epoch [{epoch+1}/{num_epochs}], Accuracy: {accuracy:.2f}%')
def predict_fake_news(text):
cleaned = preprocess_text(text)
vectorized = vectorizer.transform([cleaned]).toarray()
tensor = torch.FloatTensor(vectorized)
model.eval()
with torch.no_grad():
output = model(tensor)
_, predicted = torch.max(output.data, 1)
return "Fake News" if predicted.item() == 0 else "True News"
# Example usage
example_text = "Scientists discover new planet capable of supporting life"
prediction = predict_fake_news(example_text)
print(f"Prediction: {prediction}")
# Gradio Interface
def gradio_interface(text):
prediction = predict_fake_news(text)
return prediction
iface = gr.Interface(
fn=gradio_interface,
inputs="text",
outputs="text",
title="Fake News Detector",
description="Enter a news headline or text to predict whether it is Fake News or True News."
)
if __name__ == "__main__":
iface.launch(share=True) |