Spaces:

ya02
/

roberta_sentiment

Sleeping

App Files Files Community

ya02 commited on Aug 19, 2024

Commit

e1c6483

verified ·

1 Parent(s): 7d12038

Create app.py

Browse files

Files changed (1) hide show

app.py +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import nltk
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from scipy.special import softmax
+from tqdm.notebook import tqdm
+import gradio as gr
+# Download NLTK resources
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+# Load the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
+model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
+# Define a function to calculate polarity scores using RoBERTa
+def polarity_scores_roberta(example):
+    encoded_text = tokenizer(example, return_tensors='pt')
+    output = model(**encoded_text)
+    scores = output[0][0].detach().numpy()
+    scores = softmax(scores)
+    scores_dict = {
+        'roberta_neg': scores[0],
+        'roberta_neu': scores[1],
+        'roberta_pos': scores[2]
+    }
+    return scores_dict
+# Function to perform the analysis and return results
+def analyze_reviews(csv_file):
+    df = pd.read_csv(csv_file)
+    df = df.head(200)  # Limiting to 200 for faster processing
+    # Plot the distribution of review scores
+    ax = df['Score'].value_counts().sort_index().plot(kind='bar', title='Count of Reviews by Stars', figsize=(10, 5))
+    ax.set_xlabel('Review Stars')
+    plt.tight_layout()
+    plt.savefig('review_distribution.png')
+    res = {}
+    for i, row in tqdm(df.iterrows(), total=len(df)):
+        try:
+            text = row['Text']
+            myid = row['Id']
+            roberta_result = polarity_scores_roberta(text)
+            res[myid] = roberta_result
+        except RuntimeError:
+            print(f'Broke for id {myid}')
+    results_df = pd.DataFrame(res).T
+    results_df = results_df.reset_index().rename(columns={'index': 'Id'})
+    results_df = results_df.merge(df, how='left')
+    # Return plots and the DataFrame with results
+    return 'review_distribution.png', results_df.head().to_html(), results_df.query('Score == 1').sort_values('roberta_pos', ascending=False)['Text'].values[0], results_df.query('Score == 5').sort_values('roberta_neg', ascending=False)['Text'].values[0]
+# Define the Gradio interface
+def gradio_interface():
+    csv_file = "sample_data/Reviews.csv"  # Replace with the path to your CSV file
+    plot_path, table_html, pos_review, neg_review = analyze_reviews(csv_file)
+    return plot_path, table_html, pos_review, neg_review
+gr.Interface(
+    fn=gradio_interface,
+    inputs=None,  # or simply remove this line
+    outputs=[
+        gr.Image(type="filepath", label="Review Distribution"),
+        gr.HTML(label="Sample Results DataFrame"),
+        gr.Textbox(label="Most Positive 1-Star Review"),
+        gr.Textbox(label="Most Negative 5-Star Review"),
+    ],
+    title="Review Sentiment Analysis with RoBERTa",
+    description="Analyze sentiments in a preloaded CSV file of reviews using a RoBERTa model.",
+).launch()