Spaces:
No application file
No application file
File size: 1,885 Bytes
b396e94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import torch
import scipy.special
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# Load FinBERT model and tokenizer
finbert_ckpt = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(finbert_ckpt)
model_finbert = AutoModelForSequenceClassification.from_pretrained(finbert_ckpt).to("cuda" if torch.cuda.is_available() else "cpu")
def analyze_sentiment(text_list):
"""Performs sentiment analysis on a list of texts using FinBERT."""
preds = []
preds_proba = []
tokenizer_kwargs = {"padding": True, "truncation": True, "max_length": 512}
for text in text_list:
with torch.no_grad():
# Tokenize the input
input_sequence = tokenizer(text, return_tensors="pt", **tokenizer_kwargs).to(model_finbert.device)
logits = model_finbert(**input_sequence).logits.cpu().numpy().squeeze()
# Convert logits to probabilities
scores = {
k: v for k, v in zip(
model_finbert.config.id2label.values(),
scipy.special.softmax(logits)
)
}
# Get the most probable sentiment
sentiment = max(scores, key=scores.get)
probability = max(scores.values())
# Map the sentiment labels
if sentiment == 'LABEL_2':
sentiment = 'positive'
elif sentiment == 'LABEL_0':
sentiment = 'negative'
else:
sentiment = 'neutral'
preds.append(sentiment)
preds_proba.append(probability)
# Return a DataFrame with results
df_results = pd.DataFrame({
"Text": text_list,
"Predicted Sentiment": preds,
"Probability": preds_proba
})
return df_results |