import pandas as pd import numpy as np import torch from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from torch.utils.data import Dataset, DataLoader import gradio as gr true_news = pd.read_csv('True.csv') fake_news = pd.read_csv('Fake.csv') true_news['label'] = 1 fake_news['label'] = 0 df = pd.concat([true_news, fake_news], ignore_index=True) import re import nltk from nltk.corpus import stopwords nltk.download('stopwords') def preprocess_text(text): # Remove special characters text = re.sub(r'[^a-zA-Z\s]', '', text) # Convert to lowercase text = text.lower() # Remove stopwords stop_words = set(stopwords.words('english')) text = ' '.join([word for word in text.split() if word not in stop_words]) return text df['cleaned_text'] = df['text'].apply(preprocess_text) vectorizer = TfidfVectorizer(max_features=5000) X = vectorizer.fit_transform(df['cleaned_text']).toarray() y = df['label'].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) class NewsDataset(Dataset): def __init__(self, X, y): self.X = torch.FloatTensor(X) self.y = torch.LongTensor(y) def __len__(self): return len(self.y) def __getitem__(self, idx): return self.X[idx], self.y[idx] train_dataset = NewsDataset(X_train, y_train) test_dataset = NewsDataset(X_test, y_test) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) class FakeNewsDetector(torch.nn.Module): def __init__(self, input_dim): super(FakeNewsDetector, self).__init__() self.fc1 = torch.nn.Linear(input_dim, 64) self.fc2 = torch.nn.Linear(64, 16) self.fc3 = torch.nn.Linear(16, 2) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) x = self.fc3(x) return x model = FakeNewsDetector(X_train.shape[1]) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) num_epochs = 10 for epoch in range(num_epochs): model.train() for batch_X, batch_y in train_loader: optimizer.zero_grad() outputs = model(batch_X) loss = criterion(outputs, batch_y) loss.backward() optimizer.step() # Evaluation model.eval() correct = 0 total = 0 with torch.no_grad(): for batch_X, batch_y in test_loader: outputs = model(batch_X) _, predicted = torch.max(outputs.data, 1) total += batch_y.size(0) correct += (predicted == batch_y).sum().item() accuracy = 100 * correct / total print(f'Epoch [{epoch+1}/{num_epochs}], Accuracy: {accuracy:.2f}%') def predict_fake_news(text): cleaned = preprocess_text(text) vectorized = vectorizer.transform([cleaned]).toarray() tensor = torch.FloatTensor(vectorized) model.eval() with torch.no_grad(): output = model(tensor) _, predicted = torch.max(output.data, 1) return "Fake News" if predicted.item() == 0 else "True News" # Example usage example_text = "Scientists discover new planet capable of supporting life" prediction = predict_fake_news(example_text) print(f"Prediction: {prediction}") # Gradio Interface def gradio_interface(text): prediction = predict_fake_news(text) return prediction iface = gr.Interface( fn=gradio_interface, inputs="text", outputs="text", title="Fake News Detector", description="Enter a news headline or text to predict whether it is Fake News or True News." ) if __name__ == "__main__": iface.launch(share=True)