aliasgerovs commited on
Commit
aa77524
·
2 Parent(s): f38783b dee0f90

Merged changes.

Browse files
Files changed (4) hide show
  1. app.py +18 -1
  2. highlighter.py +43 -0
  3. predictors.py +9 -0
  4. requirements.txt +2 -1
app.py CHANGED
@@ -5,6 +5,7 @@ from predictors import predict_bc_scores, predict_mc_scores, predict_1on1_scores
5
  from analysis import depth_analysis
6
  from predictors import predict_quillbot
7
  from plagiarism import plagiarism_check, build_date
 
8
  from utils import extract_text_from_pdf, len_validator
9
  import yaml
10
 
@@ -137,6 +138,9 @@ with gr.Blocks() as demo:
137
  with gr.Row():
138
  quillbot_check = gr.Button("Humanized Text Check")
139
 
 
 
 
140
  with gr.Row():
141
  depth_analysis_btn = gr.Button("Detailed Writing Analysis")
142
 
@@ -156,8 +160,13 @@ with gr.Blocks() as demo:
156
  mcLabel = gr.Label(label="Creator")
157
  # with gr.Column():
158
  # mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
 
159
  with gr.Row():
160
- QLabel = gr.Label(label="Humanized")
 
 
 
 
161
  with gr.Group():
162
  with gr.Row():
163
  month_from = gr.Dropdown(
@@ -271,6 +280,14 @@ with gr.Blocks() as demo:
271
  api_name="depth_analysis",
272
  )
273
 
 
 
 
 
 
 
 
 
274
  date_from = ""
275
  date_to = ""
276
 
 
5
  from analysis import depth_analysis
6
  from predictors import predict_quillbot
7
  from plagiarism import plagiarism_check, build_date
8
+ from highlighter import analyze_and_highlight
9
  from utils import extract_text_from_pdf, len_validator
10
  import yaml
11
 
 
138
  with gr.Row():
139
  quillbot_check = gr.Button("Humanized Text Check")
140
 
141
+ with gr.Row():
142
+ quillbot_highlighter = gr.Button("Humanized Highlighter")
143
+
144
  with gr.Row():
145
  depth_analysis_btn = gr.Button("Detailed Writing Analysis")
146
 
 
160
  mcLabel = gr.Label(label="Creator")
161
  # with gr.Column():
162
  # mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
163
+
164
  with gr.Row():
165
+ with gr.Column():
166
+ QLabel = gr.Label(label="Humanized")
167
+ with gr.Column():
168
+ highlighter_html = gr.HTML(label='Humanized Highlighter')
169
+
170
  with gr.Group():
171
  with gr.Row():
172
  month_from = gr.Dropdown(
 
280
  api_name="depth_analysis",
281
  )
282
 
283
+ quillbot_highlighter.click(
284
+ fn=analyze_and_highlight,
285
+ inputs=[input_text],
286
+ outputs=[highlighter_html],
287
+ api_name="quillbot_highlighter",
288
+ )
289
+
290
+
291
  date_from = ""
292
  date_to = ""
293
 
highlighter.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from lime.lime_text import LimeTextExplainer
2
+ from nltk.tokenize import sent_tokenize
3
+ from predictors import predict_proba_quillbot
4
+
5
+
6
+ def explainer(text):
7
+ class_names = ['negative', 'positive']
8
+ explainer = LimeTextExplainer(class_names=class_names, split_expression=sent_tokenize)
9
+ exp = explainer.explain_instance(text, predict_proba_quillbot, num_features=20, num_samples=300)
10
+ sentences = [sent for sent in sent_tokenize(text)]
11
+ weights_mapping = exp.as_map()[1]
12
+ sentences_weights = {sentence: 0 for sentence in sentences}
13
+ for idx, weight in weights_mapping:
14
+ if 0 <= idx < len(sentences):
15
+ sentences_weights[sentences[idx]] = weight
16
+ print(sentences_weights)
17
+ return sentences_weights
18
+
19
+
20
+ def analyze_and_highlight(text):
21
+ highlighted_text = ""
22
+ sentences_weights = explainer(text)
23
+ min_weight = min(sentences_weights.values())
24
+ max_weight = max(sentences_weights.values())
25
+
26
+ for sentence, weight in sentences_weights.items():
27
+ normalized_weight = (weight - min_weight) / (max_weight - min_weight)
28
+ if weight >= 0:
29
+ color = f'rgba(255, {255 * (1 - normalized_weight)}, {255 * (1 - normalized_weight)}, 1)'
30
+ else:
31
+ color = f'rgba({255 * normalized_weight}, 255, {255 * normalized_weight}, 1)'
32
+
33
+ sentence = sentence.strip()
34
+ if not sentence:
35
+ continue
36
+
37
+ highlighted_sentence = f'<span style="background-color: {color}; color: black;">{sentence}</span> '
38
+ highlighted_text += highlighted_sentence
39
+
40
+ return highlighted_text
41
+
42
+
43
+
predictors.py CHANGED
@@ -153,6 +153,15 @@ def predict_quillbot(text):
153
  return q_score
154
 
155
 
 
 
 
 
 
 
 
 
 
156
  def predict_bc(model, tokenizer, text):
157
  with torch.no_grad():
158
  model.eval()
 
153
  return q_score
154
 
155
 
156
+ def predict_proba_quillbot(text):
157
+ with torch.no_grad():
158
+ tokenized_text = quillbot_tokenizer(text, return_tensors="pt", padding=True).to(device)
159
+ outputs = quillbot_model(**tokenized_text)
160
+ tensor_logits = outputs[0]
161
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
162
+ return probas
163
+
164
+
165
  def predict_bc(model, tokenizer, text):
166
  with torch.no_grad():
167
  model.eval()
requirements.txt CHANGED
@@ -23,4 +23,5 @@ tqdm
23
  pymupdf
24
  sentence-transformers
25
  Unidecode
26
- python-dotenv
 
 
23
  pymupdf
24
  sentence-transformers
25
  Unidecode
26
+ python-dotenv
27
+ lime