import numpy as np import torch import requests from bs4 import BeautifulSoup if torch.cuda.is_available(): device = torch.device("cuda") print('We will use the GPU:', torch.cuda.get_device_name(0)) else: device = torch.device("cpu") print('No GPU available, using the CPU instead.') import numpy as np import gradio as gr from transformers import BertTokenizer, AutoTokenizer from torch.utils.data import TensorDataset, random_split from torch.utils.data import DataLoader, RandomSampler, SequentialSampler from transformers import BertForSequenceClassification, AdamW, BertConfig import random tokenizer = AutoTokenizer.from_pretrained('armansakif/bengali-fake-news') model = BertForSequenceClassification.from_pretrained( "armansakif/bengali-fake-news", # Use the 12-layer BERT model, with an uncased vocab. num_labels = 2, # The number of output labels--2 for binary classification. # You can increase this for multi-class tasks. output_attentions = False, # Whether the model returns attentions weights. output_hidden_states = False, # Whether the model returns all hidden-states. ) # model.cuda() def extract_mainarea_content(url): response = requests.get(url) soup = BeautifulSoup(response.text, "html") mainarea_div = soup.find("div", id="mainArea") if mainarea_div: return mainarea_div.get_text() else: return "ERROR" def classify_news(news): label_list = [] input_ids = [] attention_masks = [] if "bangla.hindustantimes" in news: sent = extract_mainarea_content(news) else : sent = news label_list.append(0) encoded_dict = tokenizer.encode_plus( sent, # Sentence to encode. add_special_tokens = True, # Add '[CLS]' and '[SEP]' max_length = 512, # Pad & truncate all sentences. pad_to_max_length = True, return_attention_mask = True, # Construct attn. masks. truncation = True, return_tensors = 'pt', # Return pytorch tensors. ) input_ids.append(encoded_dict['input_ids']) attention_masks.append(encoded_dict['attention_mask']) # Convert the lists into tensors. input_ids = torch.cat(input_ids, dim=0) attention_masks = torch.cat(attention_masks, dim=0) labels = torch.tensor(label_list) testdataset = TensorDataset(input_ids, attention_masks, labels) test_dataloader = DataLoader( testdataset, # The validation samples. sampler = SequentialSampler(testdataset), # Pull out batches sequentially. batch_size = 16 # Evaluate with this batch size. ) model.eval() y_prob = [] for batch in test_dataloader: b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) with torch.no_grad(): outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) loss = outputs[0] logits = outputs[1] # probability in percent code prediction_probs = torch.nn.functional.softmax(logits) y_prob.extend(prediction_probs.detach().cpu().numpy()) print(y_prob[0][0]) print(y_prob[0][1]) #------------------------------------------------------------- _, prediction = torch.max(logits, dim=1) prediction = prediction.cpu().detach().numpy() # targets = b_labels.cpu().detach().numpy() result = 'Fake News' if prediction[0] : result = 'Authentic News' print(result) labels = ['fake', 'authentic'] return {labels[i]: float(y_prob[0][i]) for i in range(2)} demo = gr.Interface( fn=classify_news, inputs=gr.Textbox(lines=10, placeholder="News here or Hindustan Times Bangla Article Link"), outputs=gr.Label(num_top_classes=2) ) demo.launch(inline=False, share=True)