import torch import scipy.special import pandas as pd from transformers import AutoModelForSequenceClassification, AutoTokenizer # Load FinBERT model and tokenizer finbert_ckpt = "cardiffnlp/twitter-roberta-base-sentiment" tokenizer = AutoTokenizer.from_pretrained(finbert_ckpt) model_finbert = AutoModelForSequenceClassification.from_pretrained(finbert_ckpt).to("cuda" if torch.cuda.is_available() else "cpu") def analyze_sentiment(text_list): """Performs sentiment analysis on a list of texts using FinBERT.""" preds = [] preds_proba = [] tokenizer_kwargs = {"padding": True, "truncation": True, "max_length": 512} for text in text_list: with torch.no_grad(): # Tokenize the input input_sequence = tokenizer(text, return_tensors="pt", **tokenizer_kwargs).to(model_finbert.device) logits = model_finbert(**input_sequence).logits.cpu().numpy().squeeze() # Convert logits to probabilities scores = { k: v for k, v in zip( model_finbert.config.id2label.values(), scipy.special.softmax(logits) ) } # Get the most probable sentiment sentiment = max(scores, key=scores.get) probability = max(scores.values()) # Map the sentiment labels if sentiment == 'LABEL_2': sentiment = 'positive' elif sentiment == 'LABEL_0': sentiment = 'negative' else: sentiment = 'neutral' preds.append(sentiment) preds_proba.append(probability) # Return a DataFrame with results df_results = pd.DataFrame({ "Text": text_list, "Predicted Sentiment": preds, "Probability": preds_proba }) return df_results