Spaces:

ya02
/

roberta_sentiment

Sleeping

File size: 1,688 Bytes

import pandas as pd
import numpy as np
import nltk
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import gradio as gr

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger')

# Load the RoBERTa tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')

# Function to calculate polarity scores using RoBERTa
def polarity_scores_roberta(review_text):
    tokens = nltk.word_tokenize(review_text)
    encoded_text = tokenizer(review_text, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'Negative': scores[0],
        'Neutral': scores[1],
        'Positive': scores[2]
    }
    return scores_dict

# Gradio interface function
def analyze_review(review_text):
    # Analyze the review
    scores = polarity_scores_roberta(review_text)
    
    # Determine the sentiment
    sentiment = max(scores, key=scores.get)
    
    return f"The sentiment is {sentiment}.\n\nScores:\n- Negative: {scores['Negative']:.2f}\n- Neutral: {scores['Neutral']:.2f}\n- Positive: {scores['Positive']:.2f}"

# Gradio Interface
gr.Interface(
    fn=analyze_review,
    inputs=gr.Textbox(lines=5, placeholder="Enter your review here..."),
    outputs=gr.Textbox(),
    title="Review Sentiment Analysis with RoBERTa",
    description="Enter a review and get the sentiment analysis using a RoBERTa model.",
).launch()