huamnifierWithSimpleGrammer

Running

App Files Files

ali commited on Aug 30, 2024

Commit

29edf23

verified ·

1 Parent(s): cbfc0d6

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -109

app.py CHANGED Viewed

@@ -1,26 +1,33 @@
-# import dependencies
 import gradio as gr
-from openai import OpenAI
-import os
 import random
 import string
-# define the openai key
-api_key = os.getenv("OPENAI_API_KEY")
-# make an instance of the openai client
-client = OpenAI(api_key = api_key)
-# finetuned model instance
-finetuned_model = "ft:gpt-3.5-turbo-0125:noaigpt::9yy0fWeK"
-# text processing functions
 def random_capitalize(word):
     if word.isalpha() and random.random() < 0.1:
         return word.capitalize()
@@ -52,37 +59,25 @@ def random_double_space(text):
     return text
 def random_replace_comma_space(text, period_replace_percentage=0.33):
-  # Count occurrences
-  comma_occurrences = text.count(", ")
-  period_occurrences = text.count(". ")
-  # Replacements
-  replace_count_comma = max(1, comma_occurrences // 3)
-  replace_count_period = max(1, period_occurrences // 3)
-  # Find indices
-  comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
-  period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
-  # Sample indices
-  replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
-  replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
-  # Apply replacements
-  for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
-    if text.startswith(", ", idx):
-      text = text[:idx] + " ," + text[idx + 2:]
-    if text.startswith(". ", idx):
-      text = text[:idx] + " ." + text[idx + 2:]
-  return text
 def transform_paragraph(paragraph):
     words = paragraph.split()
     if len(words) > 12:
         words = [random_capitalize(word) for word in words]
         transformed_paragraph = ' '.join(words)
         transformed_paragraph = random_remove_punctuation(transformed_paragraph)
         transformed_paragraph = random_double_period(transformed_paragraph)
@@ -90,11 +85,6 @@ def transform_paragraph(paragraph):
         transformed_paragraph = random_replace_comma_space(transformed_paragraph)
     else:
         transformed_paragraph = paragraph
-    transformed_paragraph = transformed_paragraph.replace("#", "*")
-    transformed_paragraph = transformed_paragraph.replace("*", "")
-    # transformed_paragraph = transformed_paragraph.replace(", ", " ,")
     return transformed_paragraph
 def transform_text(text):
@@ -102,75 +92,44 @@ def transform_text(text):
     transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
     return '\n'.join(transformed_paragraphs)
-# function to humanize text
 def humanize_text(AI_text):
-    """Humanizes the provided AI text using the fine-tuned model."""
-    response = client.chat.completions.create(
-        model=finetuned_model,
-        temperature = 0.87,
-        messages=[
-            {"role": "system", "content": """
-            You are a text humanizer.
-            You humanize AI generated text.
-            The text must appear like humanly written.
-            THE INPUT AND THE OUTPUT HEADINGS MUST BE SAME. NO HEADING SHOULD BE MISSED.
-            NAMES LIKE NOVEL NAME SHOULD REMAIN INTACT WITHOUT ANY CHANGE.
-            THE INPUT AND THE OUTPUT SHOULD HAVE SAME WORD COUNT.
-            THE OUTPUT SENTENCES MUST NOT BE SIMPLE. THEY SHOULD BE COMPOUND, COMPLEX, OR COMPOUND COMPLEX.
-            ABOVE ALL, THE GRAMMAR AND THE SENSE OF THE SENTENCES MUST BE TOP NOTCH - DO NOT COMPROMISE ON THAT."""},
-            {"role": "system", "content": "YOU ARE TEXT HUMANIZER BUT YOU DO NOT REDUCE THE LENGTH OF THE SENTENCES. YOUR OUTPUT SENTENCES ARE OF EXACTLY THE SAME LENGTH AS THE INPUT"},
-            {"role": "user", "content": f"THE LANGUAGE OF THE INPUT AND THE OUTPUT MUST BE SAME. THE SENTENCES SHOULD NOT BE SHORT LENGTH - THEY SHOULD BE SAME AS IN THE INPUT. ALSO THE PARAGRAPHS SHOULD NOT BE SHORT EITHER - PARAGRAPHS MUST HAVE THE SAME LENGTH"},
-            {"role": "user", "content": f"THE GRAMMAR AND THE QUALITY OF THE SENTENCES MUST BE TOP NOTCH - EASY TO UNDERSTAND AND NO GRAMMATICAL ERRORS."},
-            {"role": "user", "content": "Use as many conjunctions and punctuations to make the sentence long. COMPOUND, COMPLEX, OR COMPOUND COMPLEX sentences are required"},
-            {"role": "user", "content": f"Humanize the text. Keep the output format i.e. the bullets and the headings as it is. THE GRAMMAR MUST BE TOP NOTCH WITH NO ERRORS AND EASY TO UNDERSTAND!!!!. \nTEXT: {AI_text}"}
-        ]
-    )
-    return response.choices[0].message.content.strip()
 def main_function(AI_text):
     humanized_text = humanize_text(AI_text)
-    # humanized_text= transform_text(humanized_text)
-    return humanized_text
 # Gradio interface definition
 interface = gr.Interface(
-  fn=main_function,
-  inputs="textbox",
-  outputs="textbox",
-  title="AI Text Humanizer",
-  description="Enter AI-generated text and get a human-written version. This space is availabe for limited time only so contact farhan.[email protected] to put this application in production.",
 )
 # Launch the Gradio app
-interface.launch(debug = True)
-# import gradio as gr
-# # Function to handle text submission
-# def contact_info(text):
-#     return "Contact [email protected] for Humanizer Application service"
-# # Gradio interface definition
-# interface = gr.Interface(
-#     fn=contact_info,
-#     inputs="textbox",
-#     outputs="text",
-#     title="AI TEXT HUMANIZER",
-#     description="Enter AI text and get its humanizer equivalent"
-# )
-# # Launch the Gradio app
-# if __name__ == "__main__":
-#     interface.launch()

+# Import dependencies
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
+import torch
+import nltk
 import random
 import string
+# Download NLTK data (if not already downloaded)
+nltk.download('punkt')
+nltk.download('stopwords')
+# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
+tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
+model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
+# Load SRDdev Paraphrase model and tokenizer for humanizing text
+paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
+paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase")
+# AI detection function using DistilBERT
+def detect_ai_generated(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probabilities = torch.softmax(outputs.logits, dim=1)
+    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
+    return ai_probability
+# Random text transformations to simulate human-like errors
 def random_capitalize(word):
     if word.isalpha() and random.random() < 0.1:
         return word.capitalize()
     return text
 def random_replace_comma_space(text, period_replace_percentage=0.33):
+    comma_occurrences = text.count(", ")
+    period_occurrences = text.count(". ")
+    replace_count_comma = max(1, comma_occurrences // 3)
+    replace_count_period = max(1, period_occurrences // 3)
+    comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
+    period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
+    replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
+    replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
+    for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
+        if text.startswith(", ", idx):
+            text = text[:idx] + " ," + text[idx + 2:]
+        if text.startswith(". ", idx):
+            text = text[:idx] + " ." + text[idx + 2:]
+    return text
 def transform_paragraph(paragraph):
     words = paragraph.split()
     if len(words) > 12:
         words = [random_capitalize(word) for word in words]
         transformed_paragraph = ' '.join(words)
         transformed_paragraph = random_remove_punctuation(transformed_paragraph)
         transformed_paragraph = random_double_period(transformed_paragraph)
         transformed_paragraph = random_replace_comma_space(transformed_paragraph)
     else:
         transformed_paragraph = paragraph
     return transformed_paragraph
 def transform_text(text):
     transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
     return '\n'.join(transformed_paragraphs)
+# Humanize the AI-detected text using the SRDdev Paraphrase model
 def humanize_text(AI_text):
+    paragraphs = AI_text.split("\n")
+    paraphrased_paragraphs = []
+    for paragraph in paragraphs:
+        if paragraph.strip():
+            inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True)
+            paraphrased_ids = paraphrase_model.generate(
+                inputs['input_ids'],
+                max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
+                num_beams=4,
+                early_stopping=True,
+                length_penalty=1.0,
+                no_repeat_ngram_size=3,
+            )
+            paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
+            paraphrased_paragraphs.append(paraphrased_text)
+    return "\n\n".join(paraphrased_paragraphs)
+# Main function to handle the overall process
 def main_function(AI_text):
+    ai_probabilities = [detect_ai_generated(sentence) for sentence in nltk.sent_tokenize(AI_text)]
+    ai_generated_percentage = sum([1 for prob in ai_probabilities if prob > 0.5]) / len(ai_probabilities) * 100
+    # Transform AI text to make it more human-like
     humanized_text = humanize_text(AI_text)
+    humanized_text = transform_text(humanized_text)  # Add randomness to simulate human errors
+    return f"AI-Generated Content: {ai_generated_percentage:.2f}%\n\nHumanized Text:\n{humanized_text}"
 # Gradio interface definition
 interface = gr.Interface(
+    fn=main_function,
+    inputs="textbox",
+    outputs="textbox",
+    title="AI Text Humanizer",
+    description="Enter AI-generated text and get a human-written version. This space uses models from Hugging Face directly."
 )
 # Launch the Gradio app
+interface.launch(debug=True)