ya02's picture
Update app.py
1cfa3ed verified
raw
history blame
1.69 kB
import pandas as pd
import numpy as np
import nltk
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import gradio as gr
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger')
# Load the RoBERTa tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
# Function to calculate polarity scores using RoBERTa
def polarity_scores_roberta(review_text):
tokens = nltk.word_tokenize(review_text)
encoded_text = tokenizer(review_text, return_tensors='pt')
output = model(**encoded_text)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
scores_dict = {
'Negative': scores[0],
'Neutral': scores[1],
'Positive': scores[2]
}
return scores_dict
# Gradio interface function
def analyze_review(review_text):
# Analyze the review
scores = polarity_scores_roberta(review_text)
# Determine the sentiment
sentiment = max(scores, key=scores.get)
return f"The sentiment is {sentiment}.\n\nScores:\n- Negative: {scores['Negative']:.2f}\n- Neutral: {scores['Neutral']:.2f}\n- Positive: {scores['Positive']:.2f}"
# Gradio Interface
gr.Interface(
fn=analyze_review,
inputs=gr.Textbox(lines=5, placeholder="Enter your review here..."),
outputs=gr.Textbox(),
title="Review Sentiment Analysis with RoBERTa",
description="Enter a review and get the sentiment analysis using a RoBERTa model.",
).launch()