aliasgerovs commited on
Commit
f45e494
1 Parent(s): 74f95a7
Files changed (4) hide show
  1. app.py +15 -9
  2. highlighter.py +4 -1
  3. nohup.out +19 -94
  4. predictors.py +11 -2
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import numpy as np
3
  from datetime import date
4
  from predictors import predict_bc_scores, predict_mc_scores
5
- from predictors import update, correct_text, split_text
6
  from analysis import depth_analysis
7
  from predictors import predict_quillbot
8
  from plagiarism import plagiarism_check, build_date, html_highlight
@@ -26,7 +26,9 @@ analyze_and_highlight_quillbot = partial(
26
  )
27
 
28
 
29
- def ai_generated_test(option, input, models):
 
 
30
  if option == "Human vs AI":
31
  return predict_bc_scores(input), None
32
  elif option == "Human vs AI Source Models":
@@ -132,9 +134,9 @@ with gr.Blocks() as demo:
132
  btn = gr.Button("Bias Buster")
133
  out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
134
  corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
135
- btn.click(fn=update, inputs=input_text, outputs=[out, corrections_output])
136
 
137
- with gr.Row():
138
  models = gr.Dropdown(
139
  model_list,
140
  value=model_list,
@@ -151,6 +153,10 @@ with gr.Blocks() as demo:
151
  ],
152
  label="Choose an option please.",
153
  )
 
 
 
 
154
  with gr.Column():
155
  plag_option = gr.Radio(
156
  ["Standard", "Advanced"], label="Choose an option please."
@@ -331,7 +337,7 @@ with gr.Blocks() as demo:
331
 
332
  only_ai_btn.click(
333
  fn=ai_generated_test,
334
- inputs=[ai_option, input_text, models],
335
  # outputs=[bcLabel, mcLabel, mc1on1Label],
336
  outputs=[bcLabel, mcLabel],
337
  api_name="ai_check",
@@ -339,7 +345,7 @@ with gr.Blocks() as demo:
339
 
340
  quillbot_check.click(
341
  fn=predict_quillbot,
342
- inputs=[input_text],
343
  outputs=[QLabel],
344
  api_name="quillbot_check",
345
  )
@@ -367,21 +373,21 @@ with gr.Blocks() as demo:
367
 
368
  depth_analysis_btn.click(
369
  fn=depth_analysis,
370
- inputs=[input_text],
371
  outputs=[writing_analysis_plot],
372
  api_name="depth_analysis",
373
  )
374
 
375
  quillbot_highlighter_button.click(
376
  fn=analyze_and_highlight_quillbot,
377
- inputs=[input_text],
378
  outputs=[quillbot_highlighter_output],
379
  api_name="humanized_highlighter",
380
  )
381
 
382
  bc_highlighter_button.click(
383
  fn=analyze_and_highlight_bc,
384
- inputs=[input_text],
385
  outputs=[bc_highlighter_output],
386
  api_name="bc_highlighter",
387
  )
 
2
  import numpy as np
3
  from datetime import date
4
  from predictors import predict_bc_scores, predict_mc_scores
5
+ from predictors import update,update_main, correct_text, split_text
6
  from analysis import depth_analysis
7
  from predictors import predict_quillbot
8
  from plagiarism import plagiarism_check, build_date, html_highlight
 
26
  )
27
 
28
 
29
+ def ai_generated_test(option, bias_buster_selected, input, models):
30
+ if bias_buster_selected:
31
+ input = update(input)
32
  if option == "Human vs AI":
33
  return predict_bc_scores(input), None
34
  elif option == "Human vs AI Source Models":
 
134
  btn = gr.Button("Bias Buster")
135
  out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
136
  corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
137
+ btn.click(fn=update_main, inputs=input_text, outputs=[out, corrections_output])
138
 
139
+ with gr.Row():
140
  models = gr.Dropdown(
141
  model_list,
142
  value=model_list,
 
153
  ],
154
  label="Choose an option please.",
155
  )
156
+
157
+ with gr.Column():
158
+ bias_buster_selected = gr.Checkbox(label="Bias Remover")
159
+
160
  with gr.Column():
161
  plag_option = gr.Radio(
162
  ["Standard", "Advanced"], label="Choose an option please."
 
337
 
338
  only_ai_btn.click(
339
  fn=ai_generated_test,
340
+ inputs=[ai_option, bias_buster_selected, input_text, models],
341
  # outputs=[bcLabel, mcLabel, mc1on1Label],
342
  outputs=[bcLabel, mcLabel],
343
  api_name="ai_check",
 
345
 
346
  quillbot_check.click(
347
  fn=predict_quillbot,
348
+ inputs=[input_text, bias_buster_selected],
349
  outputs=[QLabel],
350
  api_name="quillbot_check",
351
  )
 
373
 
374
  depth_analysis_btn.click(
375
  fn=depth_analysis,
376
+ inputs=[bias_buster_selected, input_text],
377
  outputs=[writing_analysis_plot],
378
  api_name="depth_analysis",
379
  )
380
 
381
  quillbot_highlighter_button.click(
382
  fn=analyze_and_highlight_quillbot,
383
+ inputs=[input_text, bias_buster_selected],
384
  outputs=[quillbot_highlighter_output],
385
  api_name="humanized_highlighter",
386
  )
387
 
388
  bc_highlighter_button.click(
389
  fn=analyze_and_highlight_bc,
390
+ inputs=[input_text, bias_buster_selected],
391
  outputs=[bc_highlighter_output],
392
  api_name="bc_highlighter",
393
  )
highlighter.py CHANGED
@@ -1,6 +1,7 @@
1
  from lime.lime_text import LimeTextExplainer
2
  from nltk.tokenize import sent_tokenize
3
  from predictors import predict_for_explainanility
 
4
 
5
 
6
  def explainer(text, model_type):
@@ -25,7 +26,9 @@ def explainer(text, model_type):
25
  return sentences_weights, exp
26
 
27
 
28
- def analyze_and_highlight(text, model_type):
 
 
29
 
30
  highlighted_text = ""
31
  sentences_weights, _ = explainer(text, model_type)
 
1
  from lime.lime_text import LimeTextExplainer
2
  from nltk.tokenize import sent_tokenize
3
  from predictors import predict_for_explainanility
4
+ from predictors import update, correct_text, split_text
5
 
6
 
7
  def explainer(text, model_type):
 
26
  return sentences_weights, exp
27
 
28
 
29
+ def analyze_and_highlight(text, bias_buster_selected, model_type):
30
+ if bias_buster_selected:
31
+ text = update(text)
32
 
33
  highlighted_text = ""
34
  sentences_weights, _ = explainer(text, model_type)
nohup.out CHANGED
@@ -1,99 +1,24 @@
1
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
2
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
3
- 2024-05-15 14:59:12.669109: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
4
- To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
5
- 2024-05-15 14:59:14.457459: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
6
- [nltk_data] Downloading package punkt to /root/nltk_data...
7
- [nltk_data] Package punkt is already up-to-date!
8
- [nltk_data] Downloading package stopwords to /root/nltk_data...
9
- [nltk_data] Package stopwords is already up-to-date!
10
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
11
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
12
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
13
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
14
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
15
- [nltk_data] Downloading package cmudict to /root/nltk_data...
16
- [nltk_data] Package cmudict is already up-to-date!
17
- [nltk_data] Downloading package punkt to /root/nltk_data...
18
- [nltk_data] Package punkt is already up-to-date!
19
- [nltk_data] Downloading package stopwords to /root/nltk_data...
20
- [nltk_data] Package stopwords is already up-to-date!
21
- [nltk_data] Downloading package wordnet to /root/nltk_data...
22
- [nltk_data] Package wordnet is already up-to-date!
23
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
24
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
25
- Collecting en_core_web_sm==2.3.1
26
- Using cached en_core_web_sm-2.3.1-py3-none-any.whl
27
- Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /usr/local/lib/python3.9/dist-packages (from en_core_web_sm==2.3.1) (2.3.9)
28
- Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.9)
29
- Requirement already satisfied: thinc<7.5.0,>=7.4.1 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.6)
30
- Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0)
31
- Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.10)
32
- Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.8)
33
- Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.7.11)
34
- Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.10.1)
35
- Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.66.2)
36
- Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.26.4)
37
- Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)
38
- Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)
39
- Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)
40
- Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.7)
41
- ✔ Download and installation successful
42
- You can now load the model via spacy.load('en_core_web_sm')
43
  Traceback (most recent call last):
44
  File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
45
  response = await route_utils.call_process_api(
46
  File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
47
  output = await app.get_blocks().process_api(
48
- File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1847, in process_api
49
- result = await self.call_function(
50
- File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1433, in call_function
51
- prediction = await anyio.to_thread.run_sync(
52
- File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
53
- return await get_async_backend().run_sync_in_worker_thread(
54
- File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
55
- return await future
56
- File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
57
- result = context.run(func, *args)
58
- File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 788, in wrapper
59
- response = f(*args, **kwargs)
60
- File "/home/aliasgarov/copyright_checker/predictors.py", line 119, in update
61
- corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
62
- NameError: name 'bias_checker' is not defined
63
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
64
- To disable this warning, you can either:
65
- - Avoid using `tokenizers` before the fork if possible
66
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
67
- /usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
68
- warnings.warn("Can't initialize NVML")
69
- IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
70
- --------
71
- Running on local URL: http://0.0.0.0:80
72
- Running on public URL: https://a5b565cd42a2675e81.gradio.live
73
-
74
- This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
75
- ["OpenAI's chief scientist and co-founder, Ilya Sutskever, is leaving the artificial-intelligence company about six months after he voted to fire Chief Executive Sam Altman only to say he regretted the move days later"]
76
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
77
- To disable this warning, you can either:
78
- - Avoid using `tokenizers` before the fork if possible
79
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
80
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
81
- To disable this warning, you can either:
82
- - Avoid using `tokenizers` before the fork if possible
83
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
84
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
85
- To disable this warning, you can either:
86
- - Avoid using `tokenizers` before the fork if possible
87
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
88
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
89
- To disable this warning, you can either:
90
- - Avoid using `tokenizers` before the fork if possible
91
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
92
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
93
- To disable this warning, you can either:
94
- - Avoid using `tokenizers` before the fork if possible
95
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
96
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
97
- To disable this warning, you can either:
98
- - Avoid using `tokenizers` before the fork if possible
99
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 
1
+ Original BC scores: AI: 0.983885645866394, HUMAN: 0.01611432246863842
2
+ Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
3
+ Input Text: sOperation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead. /s
4
+
5
+ correcting text..: 0%| | 0/2 [00:00<?, ?it/s]
6
+ correcting text..: 100%|██████████| 2/2 [00:00<00:00, 29.39it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  Traceback (most recent call last):
8
  File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
9
  response = await route_utils.call_process_api(
10
  File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
11
  output = await app.get_blocks().process_api(
12
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1856, in process_api
13
+ data = await self.postprocess_data(fn_index, result["prediction"], state)
14
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1634, in postprocess_data
15
+ self.validate_outputs(fn_index, predictions) # type: ignore
16
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1610, in validate_outputs
17
+ raise ValueError(
18
+ ValueError: An event handler (update) didn't receive enough output values (needed: 2, received: 1).
19
+ Wanted outputs:
20
+ [<gradio.components.textbox.Textbox object at 0x7f79abf202b0>, <gradio.components.textbox.Textbox object at 0x7f79abf20a60>]
21
+ Received outputs:
22
+ ["Operation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead."]
23
+ /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
24
+ warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
predictors.py CHANGED
@@ -115,12 +115,19 @@ def correct_text(text: str, bias_checker, bias_corrector, separator: str = " ")
115
  return corrected_text, corrections
116
 
117
  def update(text: str):
 
 
 
 
 
 
 
 
118
  text = clean(text, lower=False)
119
  corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
120
  corrections_display = "\n\n".join([f"Original: {orig}\nCorrected: {corr}" for orig, corr in corrections])
121
  return corrected_text, corrections_display
122
 
123
-
124
  def split_text_allow_complete_sentences_nltk(
125
  text,
126
  max_length=256,
@@ -200,7 +207,9 @@ def split_text_allow_complete_sentences_nltk(
200
  return decoded_segments
201
 
202
 
203
- def predict_quillbot(text):
 
 
204
  with torch.no_grad():
205
  quillbot_model.eval()
206
  tokenized_text = quillbot_tokenizer(
 
115
  return corrected_text, corrections
116
 
117
  def update(text: str):
118
+ text = clean(text, lower=False)
119
+ corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
120
+ corrections_display = "".join([f"{corr}" for orig, corr in corrections])
121
+ if corrections_display == "":
122
+ corrections_display = text
123
+ return corrections_display
124
+
125
+ def update_main(text: str):
126
  text = clean(text, lower=False)
127
  corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
128
  corrections_display = "\n\n".join([f"Original: {orig}\nCorrected: {corr}" for orig, corr in corrections])
129
  return corrected_text, corrections_display
130
 
 
131
  def split_text_allow_complete_sentences_nltk(
132
  text,
133
  max_length=256,
 
207
  return decoded_segments
208
 
209
 
210
+ def predict_quillbot(text, bias_buster_selected):
211
+ if bias_buster_selected:
212
+ text = update(text)
213
  with torch.no_grad():
214
  quillbot_model.eval()
215
  tokenized_text = quillbot_tokenizer(