Spaces:

ya02
/

roberta_sentiment

Sleeping

App Files Files Community

ya02 commited on Aug 19, 2024

Commit

7602e43

verified ·

1 Parent(s): 3f205c7

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -52

app.py CHANGED Viewed

@@ -1,77 +1,47 @@
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
 import nltk
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from scipy.special import softmax
-from tqdm.notebook import tqdm
 import gradio as gr
-# Download NLTK resources
 nltk.download('punkt')
 nltk.download('averaged_perceptron_tagger')
-# Load the model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
 model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
-# Define a function to calculate polarity scores using RoBERTa
-def polarity_scores_roberta(example):
-    encoded_text = tokenizer(example, return_tensors='pt')
     output = model(**encoded_text)
     scores = output[0][0].detach().numpy()
     scores = softmax(scores)
     scores_dict = {
-        'roberta_neg': scores[0],
-        'roberta_neu': scores[1],
-        'roberta_pos': scores[2]
     }
     return scores_dict
-# Function to perform the analysis and return results
-def analyze_reviews():
-    df = pd.read_csv("Reviews.csv")
-    df = df.head(200)  # Limiting to 200 for faster processing
-    # Plot the distribution of review scores
-    ax = df['Score'].value_counts().sort_index().plot(kind='bar', title='Count of Reviews by Stars', figsize=(10, 5))
-    ax.set_xlabel('Review Stars')
-    plt.tight_layout()
-    plt.savefig('review_distribution.png')
-    res = {}
-    for i, row in tqdm(df.iterrows(), total=len(df)):
-        try:
-            text = row['Text']
-            myid = row['Id']
-            roberta_result = polarity_scores_roberta(text)
-            res[myid] = roberta_result
-        except RuntimeError:
-            print(f'Broke for id {myid}')
-    results_df = pd.DataFrame(res).T
-    results_df = results_df.reset_index().rename(columns={'index': 'Id'})
-    results_df = results_df.merge(df, how='left')
-    # Return plots and the DataFrame with results
-    return 'review_distribution.png', results_df.head().to_html(), results_df.query('Score == 1').sort_values('roberta_pos', ascending=False)['Text'].values[0], results_df.query('Score == 5').sort_values('roberta_neg', ascending=False)['Text'].values[0]
-# Define the Gradio interface
-def gradio_interface():
-    csv_file = "sample_data/Reviews.csv"  # Replace with the path to your CSV file
-    plot_path, table_html, pos_review, neg_review = analyze_reviews(csv_file)
-    return plot_path, table_html, pos_review, neg_review
 gr.Interface(
-    fn=gradio_interface,
-    inputs=None,  # or simply remove this line
-    outputs=[
-        gr.Image(type="filepath", label="Review Distribution"),
-        gr.HTML(label="Sample Results DataFrame"),
-        gr.Textbox(label="Most Positive 1-Star Review"),
-        gr.Textbox(label="Most Negative 5-Star Review"),
-    ],
     title="Review Sentiment Analysis with RoBERTa",
-    description="Analyze sentiments in a preloaded CSV file of reviews using a RoBERTa model.",
-).launch()

 import pandas as pd
 import numpy as np
 import nltk
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from scipy.special import softmax
 import gradio as gr
+# Download necessary NLTK resources
 nltk.download('punkt')
 nltk.download('averaged_perceptron_tagger')
+# Load the RoBERTa tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
 model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
+# Function to calculate polarity scores using RoBERTa
+def polarity_scores_roberta(review_text):
+    tokens = nltk.word_tokenize(review_text)
+    encoded_text = tokenizer(review_text, return_tensors='pt')
     output = model(**encoded_text)
     scores = output[0][0].detach().numpy()
     scores = softmax(scores)
     scores_dict = {
+        'Negative': scores[0],
+        'Neutral': scores[1],
+        'Positive': scores[2]
     }
     return scores_dict
+# Gradio interface function
+def analyze_review(review_text):
+    # Analyze the review
+    scores = polarity_scores_roberta(review_text)
+    # Determine the sentiment
+    sentiment = max(scores, key=scores.get)
+    return f"The sentiment is {sentiment}.\n\nScores:\n- Negative: {scores['Negative']:.2f}\n- Neutral: {scores['Neutral']:.2f}\n- Positive: {scores['Positive']:.2f}"
+# Gradio Interface
 gr.Interface(
+    fn=analyze_review,
+    inputs=gr.Textbox(lines=5, placeholder="Enter your review here..."),
+    outputs=gr.Textbox(),
     title="Review Sentiment Analysis with RoBERTa",
+    description="Enter a review and get the sentiment analysis using a RoBERTa model.",
+).launch()