Spaces:

polygraf-ai
/

copyright_checker

Runtime error

App Files Files Community

aliasgerovs commited on Mar 15, 2024

Commit

aa77524

2 Parent(s): f38783b dee0f90

Merged changes.

Browse files

Files changed (4) hide show

app.py +18 -1
highlighter.py +43 -0
predictors.py +9 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from predictors import predict_bc_scores, predict_mc_scores, predict_1on1_scores
 from analysis import depth_analysis
 from predictors import predict_quillbot
 from plagiarism import plagiarism_check, build_date
 from utils import extract_text_from_pdf, len_validator
 import yaml
@@ -137,6 +138,9 @@ with gr.Blocks() as demo:
     with gr.Row():
         quillbot_check = gr.Button("Humanized Text Check")
     with gr.Row():
         depth_analysis_btn = gr.Button("Detailed Writing Analysis")
@@ -156,8 +160,13 @@ with gr.Blocks() as demo:
             mcLabel = gr.Label(label="Creator")
         # with gr.Column():
         #     mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
     with gr.Row():
-        QLabel = gr.Label(label="Humanized")
     with gr.Group():
         with gr.Row():
             month_from = gr.Dropdown(
@@ -271,6 +280,14 @@ with gr.Blocks() as demo:
         api_name="depth_analysis",
     )
     date_from = ""
     date_to = ""

 from analysis import depth_analysis
 from predictors import predict_quillbot
 from plagiarism import plagiarism_check, build_date
+from highlighter import analyze_and_highlight
 from utils import extract_text_from_pdf, len_validator
 import yaml
     with gr.Row():
         quillbot_check = gr.Button("Humanized Text Check")
+    with gr.Row():
+        quillbot_highlighter = gr.Button("Humanized Highlighter")
     with gr.Row():
         depth_analysis_btn = gr.Button("Detailed Writing Analysis")
             mcLabel = gr.Label(label="Creator")
         # with gr.Column():
         #     mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
     with gr.Row():
+        with gr.Column():
+            QLabel = gr.Label(label="Humanized")
+        with gr.Column():
+            highlighter_html = gr.HTML(label='Humanized Highlighter')
     with gr.Group():
         with gr.Row():
             month_from = gr.Dropdown(
         api_name="depth_analysis",
     )
+    quillbot_highlighter.click(
+        fn=analyze_and_highlight,
+        inputs=[input_text],
+        outputs=[highlighter_html],
+        api_name="quillbot_highlighter",
+    )
     date_from = ""
     date_to = ""

highlighter.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from lime.lime_text import LimeTextExplainer
+from nltk.tokenize import sent_tokenize
+from predictors import predict_proba_quillbot
+def explainer(text):
+    class_names = ['negative', 'positive']
+    explainer = LimeTextExplainer(class_names=class_names, split_expression=sent_tokenize)
+    exp = explainer.explain_instance(text, predict_proba_quillbot, num_features=20, num_samples=300)
+    sentences = [sent for sent in sent_tokenize(text)]
+    weights_mapping = exp.as_map()[1]
+    sentences_weights = {sentence: 0 for sentence in sentences}
+    for idx, weight in weights_mapping:
+        if 0 <= idx < len(sentences):
+            sentences_weights[sentences[idx]] = weight
+    print(sentences_weights)
+    return sentences_weights
+def analyze_and_highlight(text):
+    highlighted_text = ""
+    sentences_weights = explainer(text)
+    min_weight = min(sentences_weights.values())
+    max_weight = max(sentences_weights.values())
+    for sentence, weight in sentences_weights.items():
+        normalized_weight = (weight - min_weight) / (max_weight - min_weight)
+        if weight >= 0:
+            color = f'rgba(255, {255 * (1 - normalized_weight)}, {255 * (1 - normalized_weight)}, 1)'
+        else:
+            color = f'rgba({255 * normalized_weight}, 255, {255 * normalized_weight}, 1)'
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+        highlighted_sentence = f'<span style="background-color: {color}; color: black;">{sentence}</span> '
+        highlighted_text += highlighted_sentence
+    return highlighted_text

predictors.py CHANGED Viewed

@@ -153,6 +153,15 @@ def predict_quillbot(text):
         return q_score
 def predict_bc(model, tokenizer, text):
     with torch.no_grad():
         model.eval()

         return q_score
+def predict_proba_quillbot(text):
+    with torch.no_grad():
+        tokenized_text = quillbot_tokenizer(text, return_tensors="pt", padding=True).to(device)
+        outputs = quillbot_model(**tokenized_text)
+        tensor_logits = outputs[0]
+        probas = F.softmax(tensor_logits).detach().cpu().numpy()
+    return probas
 def predict_bc(model, tokenizer, text):
     with torch.no_grad():
         model.eval()

requirements.txt CHANGED Viewed

@@ -23,4 +23,5 @@ tqdm
 pymupdf
 sentence-transformers
 Unidecode
-python-dotenv

 pymupdf
 sentence-transformers
 Unidecode
+python-dotenv
+lime