Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,77 +1,47 @@
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
-
import matplotlib.pyplot as plt
|
4 |
-
import seaborn as sns
|
5 |
import nltk
|
6 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
7 |
from scipy.special import softmax
|
8 |
-
from tqdm.notebook import tqdm
|
9 |
import gradio as gr
|
10 |
|
11 |
-
# Download NLTK resources
|
12 |
nltk.download('punkt')
|
13 |
nltk.download('averaged_perceptron_tagger')
|
14 |
|
15 |
-
# Load the
|
16 |
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
|
17 |
model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
|
18 |
|
19 |
-
#
|
20 |
-
def polarity_scores_roberta(
|
21 |
-
|
|
|
22 |
output = model(**encoded_text)
|
23 |
scores = output[0][0].detach().numpy()
|
24 |
scores = softmax(scores)
|
25 |
scores_dict = {
|
26 |
-
'
|
27 |
-
'
|
28 |
-
'
|
29 |
}
|
30 |
return scores_dict
|
31 |
|
32 |
-
#
|
33 |
-
def
|
34 |
-
|
35 |
-
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
ax.set_xlabel('Review Stars')
|
40 |
-
plt.tight_layout()
|
41 |
-
plt.savefig('review_distribution.png')
|
42 |
|
43 |
-
|
44 |
-
for i, row in tqdm(df.iterrows(), total=len(df)):
|
45 |
-
try:
|
46 |
-
text = row['Text']
|
47 |
-
myid = row['Id']
|
48 |
-
roberta_result = polarity_scores_roberta(text)
|
49 |
-
res[myid] = roberta_result
|
50 |
-
except RuntimeError:
|
51 |
-
print(f'Broke for id {myid}')
|
52 |
-
|
53 |
-
results_df = pd.DataFrame(res).T
|
54 |
-
results_df = results_df.reset_index().rename(columns={'index': 'Id'})
|
55 |
-
results_df = results_df.merge(df, how='left')
|
56 |
-
|
57 |
-
# Return plots and the DataFrame with results
|
58 |
-
return 'review_distribution.png', results_df.head().to_html(), results_df.query('Score == 1').sort_values('roberta_pos', ascending=False)['Text'].values[0], results_df.query('Score == 5').sort_values('roberta_neg', ascending=False)['Text'].values[0]
|
59 |
-
|
60 |
-
# Define the Gradio interface
|
61 |
-
def gradio_interface():
|
62 |
-
csv_file = "sample_data/Reviews.csv" # Replace with the path to your CSV file
|
63 |
-
plot_path, table_html, pos_review, neg_review = analyze_reviews(csv_file)
|
64 |
-
return plot_path, table_html, pos_review, neg_review
|
65 |
|
|
|
66 |
gr.Interface(
|
67 |
-
fn=
|
68 |
-
inputs=
|
69 |
-
outputs=
|
70 |
-
gr.Image(type="filepath", label="Review Distribution"),
|
71 |
-
gr.HTML(label="Sample Results DataFrame"),
|
72 |
-
gr.Textbox(label="Most Positive 1-Star Review"),
|
73 |
-
gr.Textbox(label="Most Negative 5-Star Review"),
|
74 |
-
],
|
75 |
title="Review Sentiment Analysis with RoBERTa",
|
76 |
-
description="
|
77 |
-
).launch()
|
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
|
|
|
|
3 |
import nltk
|
4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
from scipy.special import softmax
|
|
|
6 |
import gradio as gr
|
7 |
|
8 |
+
# Download necessary NLTK resources
|
9 |
nltk.download('punkt')
|
10 |
nltk.download('averaged_perceptron_tagger')
|
11 |
|
12 |
+
# Load the RoBERTa tokenizer and model
|
13 |
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
|
14 |
model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
|
15 |
|
16 |
+
# Function to calculate polarity scores using RoBERTa
|
17 |
+
def polarity_scores_roberta(review_text):
|
18 |
+
tokens = nltk.word_tokenize(review_text)
|
19 |
+
encoded_text = tokenizer(review_text, return_tensors='pt')
|
20 |
output = model(**encoded_text)
|
21 |
scores = output[0][0].detach().numpy()
|
22 |
scores = softmax(scores)
|
23 |
scores_dict = {
|
24 |
+
'Negative': scores[0],
|
25 |
+
'Neutral': scores[1],
|
26 |
+
'Positive': scores[2]
|
27 |
}
|
28 |
return scores_dict
|
29 |
|
30 |
+
# Gradio interface function
|
31 |
+
def analyze_review(review_text):
|
32 |
+
# Analyze the review
|
33 |
+
scores = polarity_scores_roberta(review_text)
|
34 |
|
35 |
+
# Determine the sentiment
|
36 |
+
sentiment = max(scores, key=scores.get)
|
|
|
|
|
|
|
37 |
|
38 |
+
return f"The sentiment is {sentiment}.\n\nScores:\n- Negative: {scores['Negative']:.2f}\n- Neutral: {scores['Neutral']:.2f}\n- Positive: {scores['Positive']:.2f}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
# Gradio Interface
|
41 |
gr.Interface(
|
42 |
+
fn=analyze_review,
|
43 |
+
inputs=gr.Textbox(lines=5, placeholder="Enter your review here..."),
|
44 |
+
outputs=gr.Textbox(),
|
|
|
|
|
|
|
|
|
|
|
45 |
title="Review Sentiment Analysis with RoBERTa",
|
46 |
+
description="Enter a review and get the sentiment analysis using a RoBERTa model.",
|
47 |
+
).launch()
|