ya02 commited on
Commit
7602e43
·
verified ·
1 Parent(s): 3f205c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -52
app.py CHANGED
@@ -1,77 +1,47 @@
1
  import pandas as pd
2
  import numpy as np
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
  import nltk
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  from scipy.special import softmax
8
- from tqdm.notebook import tqdm
9
  import gradio as gr
10
 
11
- # Download NLTK resources
12
  nltk.download('punkt')
13
  nltk.download('averaged_perceptron_tagger')
14
 
15
- # Load the model and tokenizer
16
  tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
17
  model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
18
 
19
- # Define a function to calculate polarity scores using RoBERTa
20
- def polarity_scores_roberta(example):
21
- encoded_text = tokenizer(example, return_tensors='pt')
 
22
  output = model(**encoded_text)
23
  scores = output[0][0].detach().numpy()
24
  scores = softmax(scores)
25
  scores_dict = {
26
- 'roberta_neg': scores[0],
27
- 'roberta_neu': scores[1],
28
- 'roberta_pos': scores[2]
29
  }
30
  return scores_dict
31
 
32
- # Function to perform the analysis and return results
33
- def analyze_reviews():
34
- df = pd.read_csv("Reviews.csv")
35
- df = df.head(200) # Limiting to 200 for faster processing
36
 
37
- # Plot the distribution of review scores
38
- ax = df['Score'].value_counts().sort_index().plot(kind='bar', title='Count of Reviews by Stars', figsize=(10, 5))
39
- ax.set_xlabel('Review Stars')
40
- plt.tight_layout()
41
- plt.savefig('review_distribution.png')
42
 
43
- res = {}
44
- for i, row in tqdm(df.iterrows(), total=len(df)):
45
- try:
46
- text = row['Text']
47
- myid = row['Id']
48
- roberta_result = polarity_scores_roberta(text)
49
- res[myid] = roberta_result
50
- except RuntimeError:
51
- print(f'Broke for id {myid}')
52
-
53
- results_df = pd.DataFrame(res).T
54
- results_df = results_df.reset_index().rename(columns={'index': 'Id'})
55
- results_df = results_df.merge(df, how='left')
56
-
57
- # Return plots and the DataFrame with results
58
- return 'review_distribution.png', results_df.head().to_html(), results_df.query('Score == 1').sort_values('roberta_pos', ascending=False)['Text'].values[0], results_df.query('Score == 5').sort_values('roberta_neg', ascending=False)['Text'].values[0]
59
-
60
- # Define the Gradio interface
61
- def gradio_interface():
62
- csv_file = "sample_data/Reviews.csv" # Replace with the path to your CSV file
63
- plot_path, table_html, pos_review, neg_review = analyze_reviews(csv_file)
64
- return plot_path, table_html, pos_review, neg_review
65
 
 
66
  gr.Interface(
67
- fn=gradio_interface,
68
- inputs=None, # or simply remove this line
69
- outputs=[
70
- gr.Image(type="filepath", label="Review Distribution"),
71
- gr.HTML(label="Sample Results DataFrame"),
72
- gr.Textbox(label="Most Positive 1-Star Review"),
73
- gr.Textbox(label="Most Negative 5-Star Review"),
74
- ],
75
  title="Review Sentiment Analysis with RoBERTa",
76
- description="Analyze sentiments in a preloaded CSV file of reviews using a RoBERTa model.",
77
- ).launch()
 
1
  import pandas as pd
2
  import numpy as np
 
 
3
  import nltk
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  from scipy.special import softmax
 
6
  import gradio as gr
7
 
8
+ # Download necessary NLTK resources
9
  nltk.download('punkt')
10
  nltk.download('averaged_perceptron_tagger')
11
 
12
+ # Load the RoBERTa tokenizer and model
13
  tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
14
  model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
15
 
16
+ # Function to calculate polarity scores using RoBERTa
17
+ def polarity_scores_roberta(review_text):
18
+ tokens = nltk.word_tokenize(review_text)
19
+ encoded_text = tokenizer(review_text, return_tensors='pt')
20
  output = model(**encoded_text)
21
  scores = output[0][0].detach().numpy()
22
  scores = softmax(scores)
23
  scores_dict = {
24
+ 'Negative': scores[0],
25
+ 'Neutral': scores[1],
26
+ 'Positive': scores[2]
27
  }
28
  return scores_dict
29
 
30
+ # Gradio interface function
31
+ def analyze_review(review_text):
32
+ # Analyze the review
33
+ scores = polarity_scores_roberta(review_text)
34
 
35
+ # Determine the sentiment
36
+ sentiment = max(scores, key=scores.get)
 
 
 
37
 
38
+ return f"The sentiment is {sentiment}.\n\nScores:\n- Negative: {scores['Negative']:.2f}\n- Neutral: {scores['Neutral']:.2f}\n- Positive: {scores['Positive']:.2f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Gradio Interface
41
  gr.Interface(
42
+ fn=analyze_review,
43
+ inputs=gr.Textbox(lines=5, placeholder="Enter your review here..."),
44
+ outputs=gr.Textbox(),
 
 
 
 
 
45
  title="Review Sentiment Analysis with RoBERTa",
46
+ description="Enter a review and get the sentiment analysis using a RoBERTa model.",
47
+ ).launch()