trttung1610 commited on
Commit
1d5d45c
·
1 Parent(s): 6be9d7e

Upload main_v2_en.py

Browse files
Files changed (1) hide show
  1. main_v2_en.py +213 -0
main_v2_en.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import RobertaForSequenceClassification, AutoTokenizer, pipeline
2
+ import torch
3
+ import nltk
4
+ import docx2txt
5
+ import pandas as pd
6
+ import os
7
+ import matplotlib.pyplot as plt
8
+ import openpyxl
9
+ from openpyxl.styles import Font, Color, PatternFill
10
+ from openpyxl.styles.colors import WHITE
11
+ import gradio as gr
12
+
13
+ nltk.download('punkt')
14
+
15
+ # Load the model and tokenizer
16
+ senti_model = RobertaForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
17
+ senti_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest", use_fast=False)
18
+
19
+ # File read
20
+ def read_file(docx):
21
+ try:
22
+ text = docx2txt.process(docx)
23
+ lines = text.split('\n')
24
+ lines = [line.strip() for line in lines]
25
+ lines = [line for line in lines if line]
26
+ return lines # add this line
27
+ except Exception as e:
28
+ print(f"Error reading file: {e}")
29
+
30
+
31
+ # Define a function to analyze the sentiment of a text
32
+ def analyze(sentence):
33
+ input_ids = torch.tensor([senti_tokenizer.encode(sentence)])
34
+ with torch.no_grad():
35
+ out = senti_model(input_ids)
36
+ results = out.logits.softmax(dim=-1).tolist()
37
+ return results[0]
38
+
39
+
40
+ def file_analysis(docx):
41
+ # Read the file and segment the sentences
42
+ sentences = read_file(docx)
43
+
44
+ # Analyze the sentiment of each sentence
45
+ results = []
46
+ for sentence in sentences:
47
+ results.append(analyze(sentence))
48
+
49
+ return results
50
+
51
+
52
+ def generate_pie_chart(df):
53
+ # Calculate the average scores
54
+ neg_avg = df['Negative'].mean()
55
+ pos_avg = df['Positive'].mean()
56
+ neu_avg = df['Neutral'].mean()
57
+
58
+ # Create a new DataFrame with the average scores
59
+ avg_df = pd.DataFrame({'Sentiment': ['Negative', 'Neutral', 'Positive'],
60
+ 'Score': [neg_avg, neu_avg, pos_avg]})
61
+
62
+ # Set custom colors for the pie chart
63
+ colors = ['#BDBDBD', '#87CEFA', '#9ACD32']
64
+
65
+ # Create a pie chart showing the average scores
66
+ plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%')
67
+ plt.title('Average Scores by Sentiment')
68
+
69
+ # Save the pie chart as an image file in the static folder
70
+ pie_chart_name = 'pie_chart.png'
71
+ plt.savefig(pie_chart_name)
72
+ plt.close()
73
+
74
+ return pie_chart_name
75
+
76
+
77
+ def generate_excel_file(df):
78
+ # Create a new workbook and worksheet
79
+ wb = openpyxl.Workbook()
80
+ ws = wb.active
81
+
82
+ # Add column headers to the worksheet
83
+ headers = ['Negative', 'Neutral', 'Positive', 'Text']
84
+ for col_num, header in enumerate(headers, 1):
85
+ cell = ws.cell(row=1, column=col_num)
86
+ cell.value = header
87
+ cell.font = Font(bold=True)
88
+
89
+ # Set up cell formatting for each sentiment
90
+ fill_dict = {
91
+ 'Negative': PatternFill(start_color='BDBDBD', end_color='BDBDBD', fill_type='solid'),
92
+ 'Positive': PatternFill(start_color='9ACD32', end_color='9ACD32', fill_type='solid'),
93
+ 'Neutral': PatternFill(start_color='87CEFA', end_color='87CEFA', fill_type='solid')
94
+ }
95
+
96
+ # Loop through each row of the input DataFrame and write data to the worksheet
97
+ for row_num, row_data in df.iterrows():
98
+ # Calculate the highest score and corresponding sentiment for this row
99
+ sentiment_cols = ['Negative', 'Neutral', 'Positive']
100
+ scores = [row_data[col] for col in sentiment_cols]
101
+ max_score = max(scores)
102
+ max_index = scores.index(max_score)
103
+ sentiment = sentiment_cols[max_index]
104
+
105
+ # Write the data to the worksheet
106
+ for col_num, col_data in enumerate(row_data, 1):
107
+ cell = ws.cell(row=row_num + 2, column=col_num)
108
+ cell.value = col_data
109
+ if col_num in [1, 2, 3]:
110
+ if col_data == max_score:
111
+ cell.fill = fill_dict[sentiment]
112
+ if col_num == 4:
113
+ fill = fill_dict[sentiment]
114
+ font_color = WHITE if fill.start_color.rgb == 'BDBDBD' else Color('000000')
115
+ cell.fill = fill
116
+ cell.font = Font(color=font_color)
117
+ if col_data == max_score:
118
+ cell.fill = fill_dict[sentiment]
119
+
120
+ # Save the workbook
121
+ excel_file_path = 'result.xlsx'
122
+ wb.save(excel_file_path)
123
+
124
+ return excel_file_path
125
+
126
+
127
+ def process_file(docx):
128
+ # Perform analysis on the file
129
+ results = file_analysis(docx)
130
+
131
+ # Create a DataFrame from the results
132
+ df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
133
+ df['Text'] = read_file(docx)
134
+
135
+ # Generate the pie chart and excel file
136
+ pie_chart_name = generate_pie_chart(df)
137
+ excel_file_path = generate_excel_file(df)
138
+
139
+ return pie_chart_name, excel_file_path
140
+
141
+ def analyze_file(file, sentence):
142
+ excel_file_path = None
143
+ pie_chart_name = None
144
+
145
+ if file and sentence:
146
+ # Both file and sentence inputs are provided
147
+ # Process the uploaded file and generate the output files
148
+ pie_chart_name, excel_file_path = process_file(file.name)
149
+
150
+ # Analyze the sentiment of the input sentence
151
+ results = analyze(sentence)
152
+
153
+ # Get the label names
154
+ label_names = ['Negative', 'Neutral', 'Positive']
155
+
156
+ # Create the output text with labels and scores
157
+ output_text = ""
158
+ for label, score in zip(label_names, results):
159
+ score_formatted = "{:.2f}".format(score)
160
+ output_text += f"{label}: {score_formatted}\n"
161
+
162
+ return excel_file_path, pie_chart_name
163
+
164
+ elif sentence:
165
+ # Only sentence input is provided
166
+ # Analyze the sentiment of the input sentence
167
+ results = analyze(sentence)
168
+
169
+ # Get the label names
170
+ label_names = ['Negative', 'Neutral', 'Positive']
171
+
172
+ # Create the output text with labels and scores
173
+ output_text = ""
174
+ for label, score in zip(label_names, results):
175
+ score_formatted = "{:.2f}".format(score)
176
+ output_text += f"{label}: {score_formatted}\n"
177
+
178
+ # Generate the pie chart and excel file
179
+ pie_chart_name = generate_pie_chart(pd.DataFrame([results], columns=['Negative', 'Neutral', 'Positive']))
180
+ excel_file_path = generate_excel_file(pd.DataFrame([results], columns=['Negative', 'Neutral', 'Positive']))
181
+
182
+ return excel_file_path, pie_chart_name
183
+
184
+ elif file:
185
+ # Only file input is provided
186
+ # Process the uploaded file and generate the output files
187
+ pie_chart_name, excel_file_path = process_file(file.name)
188
+
189
+ # Return the file paths for the pie chart and excel file
190
+ return excel_file_path, pie_chart_name
191
+
192
+ inputs = [
193
+ gr.inputs.File(label="Select File for Analysis"),
194
+ gr.inputs.Textbox(label="Enter Text")
195
+ ]
196
+ outputs = [
197
+ gr.outputs.File(label="Analysis Result Excel"),
198
+ gr.outputs.Image(type="filepath", label="Analysis Metrics"),
199
+ ]
200
+
201
+
202
+
203
+ interface = gr.Interface(
204
+ fn=analyze_file,
205
+ inputs=inputs,
206
+ outputs=outputs,
207
+ title="Sentiment Analysis",
208
+ allow_flagging="never" # Disable flag button
209
+ )
210
+
211
+
212
+ if __name__ == "__main__":
213
+ interface.launch(share=True)