v1shal's picture
first_commit
b396e94
import torch
import scipy.special
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# Load FinBERT model and tokenizer
finbert_ckpt = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(finbert_ckpt)
model_finbert = AutoModelForSequenceClassification.from_pretrained(finbert_ckpt).to("cuda" if torch.cuda.is_available() else "cpu")
def analyze_sentiment(text_list):
"""Performs sentiment analysis on a list of texts using FinBERT."""
preds = []
preds_proba = []
tokenizer_kwargs = {"padding": True, "truncation": True, "max_length": 512}
for text in text_list:
with torch.no_grad():
# Tokenize the input
input_sequence = tokenizer(text, return_tensors="pt", **tokenizer_kwargs).to(model_finbert.device)
logits = model_finbert(**input_sequence).logits.cpu().numpy().squeeze()
# Convert logits to probabilities
scores = {
k: v for k, v in zip(
model_finbert.config.id2label.values(),
scipy.special.softmax(logits)
)
}
# Get the most probable sentiment
sentiment = max(scores, key=scores.get)
probability = max(scores.values())
# Map the sentiment labels
if sentiment == 'LABEL_2':
sentiment = 'positive'
elif sentiment == 'LABEL_0':
sentiment = 'negative'
else:
sentiment = 'neutral'
preds.append(sentiment)
preds_proba.append(probability)
# Return a DataFrame with results
df_results = pd.DataFrame({
"Text": text_list,
"Predicted Sentiment": preds,
"Probability": preds_proba
})
return df_results