huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 2, 2024

Commit

a3485f7

verified ·

1 Parent(s): 7a350d6

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -23

app.py CHANGED Viewed

@@ -6,11 +6,19 @@ import subprocess
 import nltk
 from nltk.corpus import wordnet
 from gensim import downloader as api
-from textblob import TextBlob  # Import TextBlob for grammar correction
 # Ensure necessary NLTK data is downloaded
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 # Ensure the spaCy model is installed
 try:
@@ -29,21 +37,13 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
-# Function to correct grammar using TextBlob
-def correct_grammar_with_textblob(text):
-    blob = TextBlob(text)
-    corrected_text = str(blob.correct())
     return corrected_text
-# AI detection function using DistilBERT
-def detect_ai_generated(text):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probabilities = torch.softmax(outputs.logits, dim=1)
-    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
-    return f"AI-Generated Content Probability: {ai_probability:.2f}%"
 # Function to get synonyms using NLTK WordNet
 def get_synonyms_nltk(word, pos):
     synsets = wordnet.synsets(word, pos=pos)
@@ -52,13 +52,12 @@ def get_synonyms_nltk(word, pos):
         return [lemma.name() for lemma in lemmas]
     return []
-# Paraphrasing function using spaCy and NLTK with TextBlob grammar correction
 def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
     paraphrased_words = []
     for token in doc:
-        # Map spaCy POS tags to WordNet POS tags
         pos = None
         if token.pos_ in {"NOUN"}:
             pos = wordnet.NOUN
@@ -71,19 +70,38 @@ def paraphrase_with_spacy_nltk(text):
         synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
-        # Replace with a synonym only if it makes sense
         if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
             paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
-    # Join the words back into a sentence
     paraphrased_sentence = ' '.join(paraphrased_words)
-    # Correct the grammar of the paraphrased sentence using TextBlob
-    corrected_sentence = correct_grammar_with_textblob(paraphrased_sentence)
-    return corrected_sentence
 # Gradio interface definition
 with gr.Blocks() as interface:
@@ -91,12 +109,12 @@ with gr.Blocks() as interface:
         with gr.Column():
             text_input = gr.Textbox(lines=5, label="Input Text")
             detect_button = gr.Button("AI Detection")
-            paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK (Grammar Corrected with TextBlob)")
         with gr.Column():
             output_text = gr.Textbox(label="Output")
     detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
-    paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)

 import nltk
 from nltk.corpus import wordnet
 from gensim import downloader as api
+import language_tool_python
+# Install Java
+def install_java():
+    subprocess.run(["apt-get", "update"])
+    subprocess.run(["apt-get", "install", "-y", "openjdk-11-jre"])
+install_java()
 # Ensure necessary NLTK data is downloaded
 nltk.download('wordnet')
 nltk.download('omw-1.4')
+nltk.download('punkt')  # Download the Punkt tokenizer for sentence tokenization
 # Ensure the spaCy model is installed
 try:
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
+# Function to correct grammar using LanguageTool
+def correct_grammar_with_languagetool(text):
+    tool = language_tool_python.LanguageTool('en-US')
+    matches = tool.check(text)
+    corrected_text = language_tool_python.utils.correct(text, matches)
     return corrected_text
 # Function to get synonyms using NLTK WordNet
 def get_synonyms_nltk(word, pos):
     synsets = wordnet.synsets(word, pos=pos)
         return [lemma.name() for lemma in lemmas]
     return []
+# Paraphrasing function using spaCy and NLTK
 def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
     paraphrased_words = []
     for token in doc:
         pos = None
         if token.pos_ in {"NOUN"}:
             pos = wordnet.NOUN
         synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
         if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
             paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
     paraphrased_sentence = ' '.join(paraphrased_words)
+    return paraphrased_sentence
+# Sentence structuring using NLTK
+def structure_sentences(text):
+    sentences = nltk.sent_tokenize(text)  # Tokenize text into sentences
+    structured_sentences = []
+    for sentence in sentences:
+        # Here you can apply any structuring rules or logic you need.
+        structured_sentences.append(sentence)
+    structured_text = ' '.join(structured_sentences)
+    return structured_text
+# Combined function: Paraphrase -> Structure -> Grammar Check
+def humanize_text(text):
+    # Step 1: Paraphrase
+    paraphrased_text = paraphrase_with_spacy_nltk(text)
+    # Step 2: Structure sentences
+    structured_text = structure_sentences(paraphrased_text)
+    # Step 3: Apply grammar correction
+    final_text = correct_grammar_with_languagetool(structured_text)
+    return final_text
 # Gradio interface definition
 with gr.Blocks() as interface:
         with gr.Column():
             text_input = gr.Textbox(lines=5, label="Input Text")
             detect_button = gr.Button("AI Detection")
+            humanize_button = gr.Button("Humanize Text")
         with gr.Column():
             output_text = gr.Textbox(label="Output")
     detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
+    humanize_button.click(humanize_text, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)