Spaces:

polygraf-ai
/

copyright_checker

Runtime error

App Files Files Community

aliasgerovs commited on May 15, 2024

Commit

f45e494

1 Parent(s): 74f95a7

Updated

Browse files

Files changed (4) hide show

app.py +15 -9
highlighter.py +4 -1
nohup.out +19 -94
predictors.py +11 -2

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import numpy as np
 from datetime import date
 from predictors import predict_bc_scores, predict_mc_scores
-from predictors import update, correct_text, split_text
 from analysis import depth_analysis
 from predictors import predict_quillbot
 from plagiarism import plagiarism_check, build_date, html_highlight
@@ -26,7 +26,9 @@ analyze_and_highlight_quillbot = partial(
 )
-def ai_generated_test(option, input, models):
     if option == "Human vs AI":
         return predict_bc_scores(input), None
     elif option == "Human vs AI Source Models":
@@ -132,9 +134,9 @@ with gr.Blocks() as demo:
         btn = gr.Button("Bias Buster")
         out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
         corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
-        btn.click(fn=update, inputs=input_text, outputs=[out, corrections_output])
-    with gr.Row():
         models = gr.Dropdown(
             model_list,
             value=model_list,
@@ -151,6 +153,10 @@ with gr.Blocks() as demo:
                 ],
                 label="Choose an option please.",
             )
         with gr.Column():
             plag_option = gr.Radio(
                 ["Standard", "Advanced"], label="Choose an option please."
@@ -331,7 +337,7 @@ with gr.Blocks() as demo:
     only_ai_btn.click(
         fn=ai_generated_test,
-        inputs=[ai_option, input_text, models],
         # outputs=[bcLabel, mcLabel, mc1on1Label],
         outputs=[bcLabel, mcLabel],
         api_name="ai_check",
@@ -339,7 +345,7 @@ with gr.Blocks() as demo:
     quillbot_check.click(
         fn=predict_quillbot,
-        inputs=[input_text],
         outputs=[QLabel],
         api_name="quillbot_check",
     )
@@ -367,21 +373,21 @@ with gr.Blocks() as demo:
     depth_analysis_btn.click(
         fn=depth_analysis,
-        inputs=[input_text],
         outputs=[writing_analysis_plot],
         api_name="depth_analysis",
     )
     quillbot_highlighter_button.click(
         fn=analyze_and_highlight_quillbot,
-        inputs=[input_text],
         outputs=[quillbot_highlighter_output],
         api_name="humanized_highlighter",
     )
     bc_highlighter_button.click(
         fn=analyze_and_highlight_bc,
-        inputs=[input_text],
         outputs=[bc_highlighter_output],
         api_name="bc_highlighter",
     )

 import numpy as np
 from datetime import date
 from predictors import predict_bc_scores, predict_mc_scores
+from predictors import update,update_main, correct_text, split_text
 from analysis import depth_analysis
 from predictors import predict_quillbot
 from plagiarism import plagiarism_check, build_date, html_highlight
 )
+def ai_generated_test(option, bias_buster_selected, input, models):
+    if bias_buster_selected:
+        input = update(input)
     if option == "Human vs AI":
         return predict_bc_scores(input), None
     elif option == "Human vs AI Source Models":
         btn = gr.Button("Bias Buster")
         out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
         corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
+        btn.click(fn=update_main, inputs=input_text, outputs=[out, corrections_output])
+    with gr.Row():
         models = gr.Dropdown(
             model_list,
             value=model_list,
                 ],
                 label="Choose an option please.",
             )
+        with gr.Column():
+            bias_buster_selected = gr.Checkbox(label="Bias Remover")
         with gr.Column():
             plag_option = gr.Radio(
                 ["Standard", "Advanced"], label="Choose an option please."
     only_ai_btn.click(
         fn=ai_generated_test,
+        inputs=[ai_option, bias_buster_selected, input_text, models],
         # outputs=[bcLabel, mcLabel, mc1on1Label],
         outputs=[bcLabel, mcLabel],
         api_name="ai_check",
     quillbot_check.click(
         fn=predict_quillbot,
+        inputs=[input_text, bias_buster_selected],
         outputs=[QLabel],
         api_name="quillbot_check",
     )
     depth_analysis_btn.click(
         fn=depth_analysis,
+        inputs=[bias_buster_selected, input_text],
         outputs=[writing_analysis_plot],
         api_name="depth_analysis",
     )
     quillbot_highlighter_button.click(
         fn=analyze_and_highlight_quillbot,
+        inputs=[input_text, bias_buster_selected],
         outputs=[quillbot_highlighter_output],
         api_name="humanized_highlighter",
     )
     bc_highlighter_button.click(
         fn=analyze_and_highlight_bc,
+        inputs=[input_text, bias_buster_selected],
         outputs=[bc_highlighter_output],
         api_name="bc_highlighter",
     )

highlighter.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from lime.lime_text import LimeTextExplainer
 from nltk.tokenize import sent_tokenize
 from predictors import predict_for_explainanility
 def explainer(text, model_type):
@@ -25,7 +26,9 @@ def explainer(text, model_type):
     return sentences_weights, exp
-def analyze_and_highlight(text, model_type):
     highlighted_text = ""
     sentences_weights, _ = explainer(text, model_type)

 from lime.lime_text import LimeTextExplainer
 from nltk.tokenize import sent_tokenize
 from predictors import predict_for_explainanility
+from predictors import update, correct_text, split_text
 def explainer(text, model_type):
     return sentences_weights, exp
+def analyze_and_highlight(text, bias_buster_selected,  model_type):
+    if bias_buster_selected:
+        text = update(text)
     highlighted_text = ""
     sentences_weights, _ = explainer(text, model_type)

nohup.out CHANGED Viewed

@@ -1,99 +1,24 @@
-/usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
-  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
-2024-05-15 14:59:12.669109: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
-To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
-2024-05-15 14:59:14.457459: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
-[nltk_data] Downloading package punkt to /root/nltk_data...
-[nltk_data]   Package punkt is already up-to-date!
-[nltk_data] Downloading package stopwords to /root/nltk_data...
-[nltk_data]   Package stopwords is already up-to-date!
-The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
-The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
-The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
-The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
-The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
-[nltk_data] Downloading package cmudict to /root/nltk_data...
-[nltk_data]   Package cmudict is already up-to-date!
-[nltk_data] Downloading package punkt to /root/nltk_data...
-[nltk_data]   Package punkt is already up-to-date!
-[nltk_data] Downloading package stopwords to /root/nltk_data...
-[nltk_data]   Package stopwords is already up-to-date!
-[nltk_data] Downloading package wordnet to /root/nltk_data...
-[nltk_data]   Package wordnet is already up-to-date!
-/usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
-  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
-Collecting en_core_web_sm==2.3.1
-  Using cached en_core_web_sm-2.3.1-py3-none-any.whl
-Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /usr/local/lib/python3.9/dist-packages (from en_core_web_sm==2.3.1) (2.3.9)
-Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.9)
-Requirement already satisfied: thinc<7.5.0,>=7.4.1 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.6)
-Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0)
-Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.10)
-Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.8)
-Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.7.11)
-Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.10.1)
-Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.66.2)
-Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.26.4)
-Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)
-Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)
-Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)
-Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.7)
-[38;5;2m✔ Download and installation successful[0m
-You can now load the model via spacy.load('en_core_web_sm')
 Traceback (most recent call last):
   File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
     response = await route_utils.call_process_api(
   File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
     output = await app.get_blocks().process_api(
-  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1847, in process_api
-    result = await self.call_function(
-  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1433, in call_function
-    prediction = await anyio.to_thread.run_sync(
-  File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
-    return await get_async_backend().run_sync_in_worker_thread(
-  File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
-    return await future
-  File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
-    result = context.run(func, *args)
-  File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 788, in wrapper
-    response = f(*args, **kwargs)
-  File "/home/aliasgarov/copyright_checker/predictors.py", line 119, in update
-    corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
-NameError: name 'bias_checker' is not defined
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-/usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
-  warnings.warn("Can't initialize NVML")
-IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
---------
-Running on local URL:  http://0.0.0.0:80
-Running on public URL: https://a5b565cd42a2675e81.gradio.live
-This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
-["OpenAI's chief scientist and co-founder, Ilya Sutskever, is leaving the artificial-intelligence company about six months after he voted to fire Chief Executive Sam Altman only to say he regretted the move days later"]
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)

+Original BC scores: AI: 0.983885645866394, HUMAN: 0.01611432246863842
+Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
+Input Text: sOperation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead. /s
+correcting text..:   0%|          | 0/2 [00:00<?, ?it/s]
+correcting text..: 100%|██████████| 2/2 [00:00<00:00, 29.39it/s]
 Traceback (most recent call last):
   File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
     response = await route_utils.call_process_api(
   File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
     output = await app.get_blocks().process_api(
+  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1856, in process_api
+    data = await self.postprocess_data(fn_index, result["prediction"], state)
+  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1634, in postprocess_data
+    self.validate_outputs(fn_index, predictions)  # type: ignore
+  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1610, in validate_outputs
+    raise ValueError(
+ValueError: An event handler (update) didn't receive enough output values (needed: 2, received: 1).
+Wanted outputs:
+    [<gradio.components.textbox.Textbox object at 0x7f79abf202b0>, <gradio.components.textbox.Textbox object at 0x7f79abf20a60>]
+Received outputs:
+    ["Operation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead."]
+/usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
+  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "

predictors.py CHANGED Viewed

@@ -115,12 +115,19 @@ def correct_text(text: str, bias_checker, bias_corrector, separator: str = " ")
     return corrected_text, corrections
 def update(text: str):
     text = clean(text, lower=False)
     corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
     corrections_display = "\n\n".join([f"Original: {orig}\nCorrected: {corr}" for orig, corr in corrections])
     return corrected_text, corrections_display
 def split_text_allow_complete_sentences_nltk(
     text,
     max_length=256,
@@ -200,7 +207,9 @@ def split_text_allow_complete_sentences_nltk(
     return decoded_segments
-def predict_quillbot(text):
     with torch.no_grad():
         quillbot_model.eval()
         tokenized_text = quillbot_tokenizer(

     return corrected_text, corrections
 def update(text: str):
+    text = clean(text, lower=False)
+    corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
+    corrections_display = "".join([f"{corr}" for orig, corr in corrections])
+    if corrections_display == "":
+        corrections_display = text
+    return corrections_display
+def update_main(text: str):
     text = clean(text, lower=False)
     corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
     corrections_display = "\n\n".join([f"Original: {orig}\nCorrected: {corr}" for orig, corr in corrections])
     return corrected_text, corrections_display
 def split_text_allow_complete_sentences_nltk(
     text,
     max_length=256,
     return decoded_segments
+def predict_quillbot(text, bias_buster_selected):
+    if bias_buster_selected:
+        text = update(text)
     with torch.no_grad():
         quillbot_model.eval()
         tokenized_text = quillbot_tokenizer(