Spaces:

jvedsaqib
/

Bengali_NEWS_DETECT

Sleeping

File size: 4,160 Bytes

import numpy as np
import torch
import requests
from bs4 import BeautifulSoup

if torch.cuda.is_available():
    device = torch.device("cuda")
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print('No GPU available, using the CPU instead.')

import numpy as np
import gradio as gr
from transformers import BertTokenizer, AutoTokenizer
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig
import random
tokenizer = AutoTokenizer.from_pretrained('armansakif/bengali-fake-news')

model = BertForSequenceClassification.from_pretrained(
    "armansakif/bengali-fake-news", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 2, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)
# model.cuda()

def extract_mainarea_content(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html")
    mainarea_div = soup.find("div", id="mainArea")
    if mainarea_div:
        return mainarea_div.get_text()
    else:
        return "ERROR"

def classify_news(news):
  label_list = []
  input_ids = []
  attention_masks = []
  if "bangla.hindustantimes" in news:
      sent = extract_mainarea_content(news)
  else :
      sent = news
  label_list.append(0)
  encoded_dict = tokenizer.encode_plus(
                      sent,                      # Sentence to encode.
                      add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                      max_length = 512,           # Pad & truncate all sentences.
                      pad_to_max_length = True,
                      return_attention_mask = True,   # Construct attn. masks.
                      truncation = True,
                      return_tensors = 'pt',     # Return pytorch tensors.
                  )

  input_ids.append(encoded_dict['input_ids'])

  attention_masks.append(encoded_dict['attention_mask'])

  # Convert the lists into tensors.
  input_ids = torch.cat(input_ids, dim=0)
  attention_masks = torch.cat(attention_masks, dim=0)
  labels = torch.tensor(label_list)

  testdataset = TensorDataset(input_ids, attention_masks, labels)

  test_dataloader = DataLoader(
              testdataset, # The validation samples.
              sampler = SequentialSampler(testdataset), # Pull out batches sequentially.
              batch_size = 16 # Evaluate with this batch size.
          )

  model.eval()

  y_prob = []

  for batch in test_dataloader:

      b_input_ids = batch[0].to(device)
      b_input_mask = batch[1].to(device)
      b_labels = batch[2].to(device)

      with torch.no_grad():

          outputs = model(b_input_ids,
                                  token_type_ids=None,
                                  attention_mask=b_input_mask,
                                  labels=b_labels)
          loss = outputs[0]
          logits = outputs[1]

          # probability in percent code
          prediction_probs = torch.nn.functional.softmax(logits)
          y_prob.extend(prediction_probs.detach().cpu().numpy())

          print(y_prob[0][0])
          print(y_prob[0][1])
          #-------------------------------------------------------------

          _, prediction = torch.max(logits, dim=1)
          prediction = prediction.cpu().detach().numpy()
          # targets = b_labels.cpu().detach().numpy()

          result = 'Fake News'
          if prediction[0] :
            result = 'Authentic News'
          print(result)
          labels = ['fake', 'authentic']

          return {labels[i]: float(y_prob[0][i]) for i in range(2)}
demo = gr.Interface(
    fn=classify_news,
    inputs=gr.Textbox(lines=10, placeholder="News here or Hindustan Times Bangla Article Link"),
    outputs=gr.Label(num_top_classes=2)
)
demo.launch(inline=False, share=True)