CIS5190-PROJ/BERTv3 · Hugging Face

import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from safetensors.torch import load_file

def evaluate(test_data):

  tokenizer = BertTokenizer.from_pretrained("CIS5190-PROJ/BERTv3") 
  model = BertForSequenceClassification.from_pretrained("CIS5190-PROJ/BERTv3")

  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.to(device)
  model.eval()


  test_texts = test_data['title'].tolist()  
  test_encodings = tokenizer(test_texts, truncation=True, padding="max_length", max_length=64)
  test_encodings = {key: torch.tensor(val).to(device) for key, val in test_encodings.items()}
  with torch.no_grad():
      outputs = model(**test_encodings)
      logits = outputs.logits
      predictions = torch.argmax(logits, dim=1).cpu().numpy()
  return 1- predictions