Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import spacy
|
|
5 |
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
|
|
8 |
|
9 |
# Initialize the English text classification pipeline for AI detection
|
10 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
@@ -25,6 +26,9 @@ except OSError:
|
|
25 |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
26 |
nlp = spacy.load("en_core_web_sm")
|
27 |
|
|
|
|
|
|
|
28 |
# Function to get synonyms using NLTK WordNet (Humanifier)
|
29 |
def get_synonyms_nltk(word, pos):
|
30 |
synsets = wordnet.synsets(word, pos=pos)
|
@@ -51,20 +55,6 @@ def capitalize_sentences_and_nouns(text):
|
|
51 |
|
52 |
return ' '.join(corrected_text)
|
53 |
|
54 |
-
# Function to correct tense errors in a sentence (Tense Correction)
|
55 |
-
def correct_tense_errors(text):
|
56 |
-
doc = nlp(text)
|
57 |
-
corrected_text = []
|
58 |
-
for token in doc:
|
59 |
-
# Check for tense correction based on modal verbs
|
60 |
-
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
|
61 |
-
# Replace with appropriate verb form
|
62 |
-
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
|
63 |
-
corrected_text.append(lemma)
|
64 |
-
else:
|
65 |
-
corrected_text.append(token.text)
|
66 |
-
return ' '.join(corrected_text)
|
67 |
-
|
68 |
# Function to correct singular/plural errors (Singular/Plural Correction)
|
69 |
def correct_singular_plural_errors(text):
|
70 |
doc = nlp(text)
|
@@ -72,15 +62,12 @@ def correct_singular_plural_errors(text):
|
|
72 |
|
73 |
for token in doc:
|
74 |
if token.pos_ == "NOUN":
|
75 |
-
# Check if the noun is singular or plural
|
76 |
if token.tag_ == "NN": # Singular noun
|
77 |
-
# Look for determiners like "many", "several", "few" to correct to plural
|
78 |
if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
|
79 |
corrected_text.append(token.lemma_ + 's')
|
80 |
else:
|
81 |
corrected_text.append(token.text)
|
82 |
elif token.tag_ == "NNS": # Plural noun
|
83 |
-
# Look for determiners like "a", "one" to correct to singular
|
84 |
if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
|
85 |
corrected_text.append(token.lemma_)
|
86 |
else:
|
@@ -90,6 +77,18 @@ def correct_singular_plural_errors(text):
|
|
90 |
|
91 |
return ' '.join(corrected_text)
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
# Function to check and correct article errors
|
94 |
def correct_article_errors(text):
|
95 |
doc = nlp(text)
|
@@ -123,12 +122,11 @@ def replace_with_synonym(token):
|
|
123 |
|
124 |
if synonyms:
|
125 |
synonym = synonyms[0]
|
126 |
-
|
127 |
-
if token.tag_ == "VBG": # Present participle (e.g., running)
|
128 |
synonym = synonym + 'ing'
|
129 |
-
elif token.tag_ == "VBD" or token.tag_ == "VBN":
|
130 |
synonym = synonym + 'ed'
|
131 |
-
elif token.tag_ == "VBZ":
|
132 |
synonym = synonym + 's'
|
133 |
return synonym
|
134 |
return token.text
|
@@ -139,7 +137,6 @@ def correct_double_negatives(text):
|
|
139 |
corrected_text = []
|
140 |
for token in doc:
|
141 |
if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
|
142 |
-
# Replace the double negative with a positive statement
|
143 |
corrected_text.append("always")
|
144 |
else:
|
145 |
corrected_text.append(token.text)
|
@@ -151,15 +148,20 @@ def ensure_subject_verb_agreement(text):
|
|
151 |
corrected_text = []
|
152 |
for token in doc:
|
153 |
if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
|
154 |
-
|
155 |
-
if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
|
156 |
corrected_text.append(token.head.lemma_ + "s")
|
157 |
-
elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
|
158 |
corrected_text.append(token.head.lemma_)
|
159 |
corrected_text.append(token.text)
|
160 |
return ' '.join(corrected_text)
|
161 |
|
162 |
-
# Function to
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
def paraphrase_and_correct(text):
|
164 |
# Capitalize first to ensure proper noun capitalization
|
165 |
paraphrased_text = capitalize_sentences_and_nouns(text)
|
@@ -180,7 +182,10 @@ def paraphrase_and_correct(text):
|
|
180 |
else:
|
181 |
final_text.append(token.text)
|
182 |
|
183 |
-
|
|
|
|
|
|
|
184 |
|
185 |
# Gradio app setup with two tabs
|
186 |
with gr.Blocks() as demo:
|
|
|
5 |
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
8 |
+
from spellchecker import SpellChecker # Import SpellChecker for spelling correction
|
9 |
|
10 |
# Initialize the English text classification pipeline for AI detection
|
11 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
|
26 |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
27 |
nlp = spacy.load("en_core_web_sm")
|
28 |
|
29 |
+
# Initialize SpellChecker
|
30 |
+
spell = SpellChecker()
|
31 |
+
|
32 |
# Function to get synonyms using NLTK WordNet (Humanifier)
|
33 |
def get_synonyms_nltk(word, pos):
|
34 |
synsets = wordnet.synsets(word, pos=pos)
|
|
|
55 |
|
56 |
return ' '.join(corrected_text)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# Function to correct singular/plural errors (Singular/Plural Correction)
|
59 |
def correct_singular_plural_errors(text):
|
60 |
doc = nlp(text)
|
|
|
62 |
|
63 |
for token in doc:
|
64 |
if token.pos_ == "NOUN":
|
|
|
65 |
if token.tag_ == "NN": # Singular noun
|
|
|
66 |
if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
|
67 |
corrected_text.append(token.lemma_ + 's')
|
68 |
else:
|
69 |
corrected_text.append(token.text)
|
70 |
elif token.tag_ == "NNS": # Plural noun
|
|
|
71 |
if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
|
72 |
corrected_text.append(token.lemma_)
|
73 |
else:
|
|
|
77 |
|
78 |
return ' '.join(corrected_text)
|
79 |
|
80 |
+
# Function to correct tense errors in a sentence (Tense Correction)
|
81 |
+
def correct_tense_errors(text):
|
82 |
+
doc = nlp(text)
|
83 |
+
corrected_text = []
|
84 |
+
for token in doc:
|
85 |
+
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
|
86 |
+
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
|
87 |
+
corrected_text.append(lemma)
|
88 |
+
else:
|
89 |
+
corrected_text.append(token.text)
|
90 |
+
return ' '.join(corrected_text)
|
91 |
+
|
92 |
# Function to check and correct article errors
|
93 |
def correct_article_errors(text):
|
94 |
doc = nlp(text)
|
|
|
122 |
|
123 |
if synonyms:
|
124 |
synonym = synonyms[0]
|
125 |
+
if token.tag_ == "VBG":
|
|
|
126 |
synonym = synonym + 'ing'
|
127 |
+
elif token.tag_ == "VBD" or token.tag_ == "VBN":
|
128 |
synonym = synonym + 'ed'
|
129 |
+
elif token.tag_ == "VBZ":
|
130 |
synonym = synonym + 's'
|
131 |
return synonym
|
132 |
return token.text
|
|
|
137 |
corrected_text = []
|
138 |
for token in doc:
|
139 |
if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
|
|
|
140 |
corrected_text.append("always")
|
141 |
else:
|
142 |
corrected_text.append(token.text)
|
|
|
148 |
corrected_text = []
|
149 |
for token in doc:
|
150 |
if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
|
151 |
+
if token.tag_ == "NN" and token.head.tag_ != "VBZ":
|
|
|
152 |
corrected_text.append(token.head.lemma_ + "s")
|
153 |
+
elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
|
154 |
corrected_text.append(token.head.lemma_)
|
155 |
corrected_text.append(token.text)
|
156 |
return ' '.join(corrected_text)
|
157 |
|
158 |
+
# Function to correct spelling errors
|
159 |
+
def correct_spelling(text):
|
160 |
+
words = text.split()
|
161 |
+
corrected_words = [spell.candidates(word) or word for word in words]
|
162 |
+
return ' '.join(corrected_words)
|
163 |
+
|
164 |
+
# Function to paraphrase, correct grammar, and fix spelling errors
|
165 |
def paraphrase_and_correct(text):
|
166 |
# Capitalize first to ensure proper noun capitalization
|
167 |
paraphrased_text = capitalize_sentences_and_nouns(text)
|
|
|
182 |
else:
|
183 |
final_text.append(token.text)
|
184 |
|
185 |
+
# Correct spelling errors
|
186 |
+
final_text = correct_spelling(' '.join(final_text))
|
187 |
+
|
188 |
+
return final_text
|
189 |
|
190 |
# Gradio app setup with two tabs
|
191 |
with gr.Blocks() as demo:
|