Spaces:
No application file
No application file
import torch | |
import scipy.special | |
import pandas as pd | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
# Load FinBERT model and tokenizer | |
finbert_ckpt = "cardiffnlp/twitter-roberta-base-sentiment" | |
tokenizer = AutoTokenizer.from_pretrained(finbert_ckpt) | |
model_finbert = AutoModelForSequenceClassification.from_pretrained(finbert_ckpt).to("cuda" if torch.cuda.is_available() else "cpu") | |
def analyze_sentiment(text_list): | |
"""Performs sentiment analysis on a list of texts using FinBERT.""" | |
preds = [] | |
preds_proba = [] | |
tokenizer_kwargs = {"padding": True, "truncation": True, "max_length": 512} | |
for text in text_list: | |
with torch.no_grad(): | |
# Tokenize the input | |
input_sequence = tokenizer(text, return_tensors="pt", **tokenizer_kwargs).to(model_finbert.device) | |
logits = model_finbert(**input_sequence).logits.cpu().numpy().squeeze() | |
# Convert logits to probabilities | |
scores = { | |
k: v for k, v in zip( | |
model_finbert.config.id2label.values(), | |
scipy.special.softmax(logits) | |
) | |
} | |
# Get the most probable sentiment | |
sentiment = max(scores, key=scores.get) | |
probability = max(scores.values()) | |
# Map the sentiment labels | |
if sentiment == 'LABEL_2': | |
sentiment = 'positive' | |
elif sentiment == 'LABEL_0': | |
sentiment = 'negative' | |
else: | |
sentiment = 'neutral' | |
preds.append(sentiment) | |
preds_proba.append(probability) | |
# Return a DataFrame with results | |
df_results = pd.DataFrame({ | |
"Text": text_list, | |
"Predicted Sentiment": preds, | |
"Probability": preds_proba | |
}) | |
return df_results |