huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 9, 2024

Commit

73bd89d

verified ·

1 Parent(s): b03c66d

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -60

app.py CHANGED Viewed

@@ -5,16 +5,10 @@ import spacy
 import subprocess
 import nltk
 from nltk.corpus import wordnet
-from pattern.en import conjugate, tenses
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
-# Function to predict the label and score for English text (AI Detection)
-def predict_en(text):
-    res = pipeline_en(text)[0]
-    return res['label'], res['score']
 # Ensure necessary NLTK data is downloaded for Humanifier
 nltk.download('wordnet')
 nltk.download('omw-1.4')
@@ -52,59 +46,54 @@ def capitalize_sentences_and_nouns(text):
     return ' '.join(corrected_text)
-# Function to check and correct conjunction errors with 'because' and 'but'
-def check_conjunction_errors(text):
-    # Replace misplaced 'because' and 'but'
-    text = text.replace("because, ", "because ")
-    text = text.replace("but, ", "but ")
-    return text
-# Function to check and correct tense consistency in sentences using Pattern.en
-def check_tense_consistency(text):
     doc = nlp(text)
-    corrected_sentences = []
-    for sent in doc.sents:
-        verbs = [token for token in sent if token.pos_ == 'VERB']
-        if verbs:
-            # Find the most common tense in the sentence
-            common_tense = None
-            for verb in verbs:
-                verb_tense = tenses(verb.text)
-                if verb_tense:
-                    common_tense = verb_tense[0][0]
-                    break
-            # Conjugate all verbs to the common tense if there's inconsistency
-            corrected_sentence = []
-            for token in sent:
-                if token.pos_ == 'VERB' and common_tense:
-                    corrected_verb = conjugate(token.text, tense=common_tense)
-                    corrected_sentence.append(corrected_verb)
-                else:
-                    corrected_sentence.append(token.text)
-            corrected_sentences.append(' '.join(corrected_sentence))
         else:
-            corrected_sentences.append(sent.text)
-    return ' '.join(corrected_sentences)
-# Function to check and correct article usage ('a', 'an', 'the')
-def check_article_usage(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
-        if token.text.lower() in ['a', 'an', 'the']:
-            if token.text.lower() == 'a' and token.head.pos_ in ['NOUN', 'ADJ'] and token.head.text[0] in 'aeiou':
-                corrected_text.append('an')
-            elif token.text.lower() == 'an' and token.head.pos_ in ['NOUN', 'ADJ'] and token.head.text[0] not in 'aeiou':
-                corrected_text.append('a')
             else:
                 corrected_text.append(token.text)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 # Paraphrasing function using SpaCy and NLTK (Humanifier)
@@ -140,22 +129,18 @@ def paraphrase_with_spacy_nltk(text):
     return corrected_text
-# Combined function: Paraphrase -> Grammar Correction -> Capitalization -> Tense Consistency (Humanifier)
 def paraphrase_and_correct(text):
     # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
-    # Step 2: Check and correct conjunction errors
-    corrected_conjunctions = check_conjunction_errors(paraphrased_text)
-    # Step 3: Check and correct article usage
-    corrected_articles = check_article_usage(corrected_conjunctions)
-    # Step 4: Capitalize sentences and proper nouns
-    capitalized_text = capitalize_sentences_and_nouns(corrected_articles)
-    # Step 5: Check and correct tense consistency
-    final_text = check_tense_consistency(capitalized_text)
     return final_text
@@ -175,7 +160,7 @@ with gr.Blocks() as demo:
         paraphrase_button = gr.Button("Paraphrase & Correct")
         output_text = gr.Textbox(label="Paraphrased Text")
-        # Connect the paraphrasing and correction function to the button
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the app with the remaining functionalities

 import subprocess
 import nltk
 from nltk.corpus import wordnet
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 # Ensure necessary NLTK data is downloaded for Humanifier
 nltk.download('wordnet')
 nltk.download('omw-1.4')
     return ' '.join(corrected_text)
+# Function to correct tense errors in a sentence (Tense Correction)
+def correct_tense_errors(text):
     doc = nlp(text)
+    corrected_text = []
+    for token in doc:
+        # Check for tense correction based on modal verbs
+        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
+            # Replace with appropriate verb form
+            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
+            corrected_text.append(lemma)
         else:
+            corrected_text.append(token.text)
+    return ' '.join(corrected_text)
+# Function to correct singular/plural errors (Singular/Plural Correction)
+def correct_singular_plural_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
+        if token.pos_ == "NOUN" and token.tag_ == "NN":
+            if token.dep_ == "nsubj" and any(t.dep_ == "nummod" for t in token.head.children):
+                corrected_text.append(token.text + "s")
+            else:
+                corrected_text.append(token.text)
+        elif token.pos_ == "NOUN" and token.tag_ == "NNS":
+            if token.dep_ == "nsubj" and not any(t.dep_ == "nummod" for t in token.head.children):
+                corrected_text.append(token.lemma_)
             else:
                 corrected_text.append(token.text)
         else:
             corrected_text.append(token.text)
+    return ' '.join(corrected_text)
+# Function to check and correct article errors
+def correct_article_errors(text):
+    doc = nlp(text)
+    corrected_text = []
+    for token in doc:
+        if token.text in ['a', 'an']:
+            next_token = token.nbor(1)
+            if token.text == "a" and next_token.text[0].lower() in "aeiou":
+                corrected_text.append("an")
+            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
+                corrected_text.append("a")
+            else:
+                corrected_text.append(token.text)
+        else:
+            corrected_text.append(token.text)
     return ' '.join(corrected_text)
 # Paraphrasing function using SpaCy and NLTK (Humanifier)
     return corrected_text
+# Combined function: Paraphrase -> Capitalization -> Grammar Correction (Humanifier)
 def paraphrase_and_correct(text):
     # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
+    # Step 2: Apply grammatical corrections
+    corrected_text = correct_article_errors(paraphrased_text)
+    corrected_text = correct_tense_errors(corrected_text)
+    corrected_text = correct_singular_plural_errors(corrected_text)
+    # Step 3: Capitalize sentences and proper nouns
+    final_text = capitalize_sentences_and_nouns(corrected_text)
     return final_text
         paraphrase_button = gr.Button("Paraphrase & Correct")
         output_text = gr.Textbox(label="Paraphrased Text")
+        # Connect the paraphrasing function to the button
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the app with the remaining functionalities