|
import torch |
|
import scipy.special |
|
import pandas as pd |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
|
|
|
finbert_ckpt = "cardiffnlp/twitter-roberta-base-sentiment" |
|
tokenizer = AutoTokenizer.from_pretrained(finbert_ckpt) |
|
model_finbert = AutoModelForSequenceClassification.from_pretrained(finbert_ckpt).to("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
def analyze_sentiment(text_list): |
|
"""Performs sentiment analysis on a list of texts using FinBERT.""" |
|
preds = [] |
|
preds_proba = [] |
|
|
|
tokenizer_kwargs = {"padding": True, "truncation": True, "max_length": 512} |
|
|
|
for text in text_list: |
|
with torch.no_grad(): |
|
|
|
input_sequence = tokenizer(text, return_tensors="pt", **tokenizer_kwargs).to(model_finbert.device) |
|
logits = model_finbert(**input_sequence).logits.cpu().numpy().squeeze() |
|
|
|
|
|
scores = { |
|
k: v for k, v in zip( |
|
model_finbert.config.id2label.values(), |
|
scipy.special.softmax(logits) |
|
) |
|
} |
|
|
|
|
|
sentiment = max(scores, key=scores.get) |
|
probability = max(scores.values()) |
|
|
|
|
|
if sentiment == 'LABEL_2': |
|
sentiment = 'positive' |
|
elif sentiment == 'LABEL_0': |
|
sentiment = 'negative' |
|
else: |
|
sentiment = 'neutral' |
|
|
|
preds.append(sentiment) |
|
preds_proba.append(probability) |
|
|
|
|
|
df_results = pd.DataFrame({ |
|
"Text": text_list, |
|
"Predicted Sentiment": preds, |
|
"Probability": preds_proba |
|
}) |
|
|
|
return df_results |