Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,13 +2,13 @@ import os
|
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
import spacy
|
5 |
-
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
8 |
from spellchecker import SpellChecker
|
9 |
import re
|
10 |
import inflect
|
11 |
|
|
|
12 |
try:
|
13 |
nlp = spacy.load("en_core_web_sm")
|
14 |
except OSError:
|
@@ -16,8 +16,6 @@ except OSError:
|
|
16 |
spacy.cli.download("en_core_web_sm")
|
17 |
nlp = spacy.load("en_core_web_sm")
|
18 |
|
19 |
-
|
20 |
-
|
21 |
# Initialize the English text classification pipeline for AI detection
|
22 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
23 |
|
@@ -31,9 +29,6 @@ inflect_engine = inflect.engine()
|
|
31 |
nltk.download('wordnet')
|
32 |
nltk.download('omw-1.4')
|
33 |
|
34 |
-
# Load the SpaCy model
|
35 |
-
nlp = spacy.load("en_core_web_sm")
|
36 |
-
|
37 |
# Function to predict the label and score for English text (AI Detection)
|
38 |
def predict_en(text):
|
39 |
res = pipeline_en(text)[0]
|
@@ -172,22 +167,67 @@ def ensure_subject_verb_agreement(text):
|
|
172 |
corrected_text.append(token.text)
|
173 |
return ' '.join(corrected_text)
|
174 |
|
175 |
-
#
|
176 |
-
def
|
177 |
words = text.split()
|
178 |
corrected_words = []
|
179 |
for word in words:
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
return ' '.join(corrected_words)
|
183 |
|
184 |
-
#
|
185 |
-
def
|
|
|
186 |
text = re.sub(r'\s+([?.!,";:])', r'\1', text)
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
return text
|
189 |
|
190 |
-
# Function to
|
191 |
def handle_possessives(text):
|
192 |
text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
|
193 |
return text
|
@@ -231,53 +271,40 @@ def rephrase_with_synonyms(text):
|
|
231 |
|
232 |
return ' '.join(rephrased_text)
|
233 |
|
234 |
-
# Function to paraphrase and correct grammar with enhanced accuracy
|
235 |
-
def paraphrase_and_correct(text):
|
236 |
-
# Remove meaningless or redundant words first
|
237 |
-
cleaned_text = remove_redundant_words(text)
|
238 |
-
|
239 |
-
# Capitalize sentences and proper nouns
|
240 |
-
cleaned_text = capitalize_sentences_and_nouns(cleaned_text)
|
241 |
-
|
242 |
-
# Correct tense errors
|
243 |
-
cleaned_text = correct_tense_errors(cleaned_text)
|
244 |
-
|
245 |
-
# Correct singular/plural errors
|
246 |
-
cleaned_text = correct_singular_plural_errors(cleaned_text)
|
247 |
-
|
248 |
-
# Correct article errors
|
249 |
-
cleaned_text = correct_article_errors(cleaned_text)
|
250 |
-
|
251 |
-
# Correct spelling
|
252 |
-
cleaned_text = correct_spelling(cleaned_text)
|
253 |
-
|
254 |
-
# Correct punctuation issues
|
255 |
-
cleaned_text = correct_punctuation(cleaned_text)
|
256 |
-
|
257 |
-
# Handle possessives
|
258 |
-
cleaned_text = handle_possessives(cleaned_text)
|
259 |
-
|
260 |
-
# Replace words with synonyms
|
261 |
-
cleaned_text = rephrase_with_synonyms(cleaned_text)
|
262 |
-
|
263 |
-
# Correct double negatives
|
264 |
-
cleaned_text = correct_double_negatives(cleaned_text)
|
265 |
-
|
266 |
-
# Ensure subject-verb agreement
|
267 |
-
cleaned_text = ensure_subject_verb_agreement(cleaned_text)
|
268 |
-
|
269 |
-
return cleaned_text
|
270 |
-
|
271 |
# Function to detect AI-generated content
|
272 |
def detect_ai(text):
|
273 |
label, score = predict_en(text)
|
274 |
return label, score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
def gradio_interface(text):
|
276 |
label, score = detect_ai(text)
|
277 |
corrected_text = paraphrase_and_correct(text)
|
278 |
return {label: score}, corrected_text
|
279 |
|
280 |
-
#
|
281 |
iface = gr.Interface(
|
282 |
fn=gradio_interface,
|
283 |
inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
|
@@ -290,4 +317,4 @@ iface = gr.Interface(
|
|
290 |
)
|
291 |
|
292 |
# Launch the app
|
293 |
-
iface.launch()
|
|
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
import spacy
|
|
|
5 |
import nltk
|
6 |
from nltk.corpus import wordnet
|
7 |
from spellchecker import SpellChecker
|
8 |
import re
|
9 |
import inflect
|
10 |
|
11 |
+
# Initialize components
|
12 |
try:
|
13 |
nlp = spacy.load("en_core_web_sm")
|
14 |
except OSError:
|
|
|
16 |
spacy.cli.download("en_core_web_sm")
|
17 |
nlp = spacy.load("en_core_web_sm")
|
18 |
|
|
|
|
|
19 |
# Initialize the English text classification pipeline for AI detection
|
20 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
21 |
|
|
|
29 |
nltk.download('wordnet')
|
30 |
nltk.download('omw-1.4')
|
31 |
|
|
|
|
|
|
|
32 |
# Function to predict the label and score for English text (AI Detection)
|
33 |
def predict_en(text):
|
34 |
res = pipeline_en(text)[0]
|
|
|
167 |
corrected_text.append(token.text)
|
168 |
return ' '.join(corrected_text)
|
169 |
|
170 |
+
# Enhance the spell checker function
|
171 |
+
def enhanced_spell_check(text):
|
172 |
words = text.split()
|
173 |
corrected_words = []
|
174 |
for word in words:
|
175 |
+
if '_' in word: # Handle cases like 'animate_being'
|
176 |
+
sub_words = word.split('_')
|
177 |
+
corrected_sub_words = [spell.correction(w) for w in sub_words]
|
178 |
+
corrected_words.append('_'.join(corrected_sub_words))
|
179 |
+
else:
|
180 |
+
corrected_word = spell.correction(word)
|
181 |
+
corrected_words.append(corrected_word if corrected_word else word)
|
182 |
+
return ' '.join(corrected_words)
|
183 |
+
|
184 |
+
# Function to correct common semantic errors
|
185 |
+
def correct_semantic_errors(text):
|
186 |
+
semantic_corrections = {
|
187 |
+
"animate_being": "animal",
|
188 |
+
"little": "smallest",
|
189 |
+
"big": "largest",
|
190 |
+
"mammalian": "mammals",
|
191 |
+
"universe": "world",
|
192 |
+
"manner": "ways",
|
193 |
+
"continue": "preserve",
|
194 |
+
"dirt": "soil",
|
195 |
+
"wellness": "health",
|
196 |
+
"modulate": "regulate",
|
197 |
+
"clime": "climate",
|
198 |
+
"function": "role",
|
199 |
+
"keeping": "maintaining",
|
200 |
+
"lend": "contribute",
|
201 |
+
"better": "improve",
|
202 |
+
"is": "s",
|
203 |
+
"wite": "write",
|
204 |
+
"alos": "also",
|
205 |
+
"ads": "as",
|
206 |
+
"dictuionatr": "dictionary",
|
207 |
+
"wors": "words"
|
208 |
+
}
|
209 |
+
|
210 |
+
words = text.split()
|
211 |
+
corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
|
212 |
return ' '.join(corrected_words)
|
213 |
|
214 |
+
# Enhance the punctuation correction function
|
215 |
+
def enhance_punctuation(text):
|
216 |
+
# Remove extra spaces before punctuation
|
217 |
text = re.sub(r'\s+([?.!,";:])', r'\1', text)
|
218 |
+
|
219 |
+
# Add space after punctuation if it's missing
|
220 |
+
text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)
|
221 |
+
|
222 |
+
# Correct spacing for quotes
|
223 |
+
text = re.sub(r'\s*"\s*', '" ', text).strip()
|
224 |
+
|
225 |
+
# Ensure proper capitalization after sentence-ending punctuation
|
226 |
+
text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
|
227 |
+
|
228 |
return text
|
229 |
|
230 |
+
# Function to handle possessives
|
231 |
def handle_possessives(text):
|
232 |
text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
|
233 |
return text
|
|
|
271 |
|
272 |
return ' '.join(rephrased_text)
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
# Function to detect AI-generated content
|
275 |
def detect_ai(text):
|
276 |
label, score = predict_en(text)
|
277 |
return label, score
|
278 |
+
|
279 |
+
# Enhance the paraphrase_and_correct function
|
280 |
+
def paraphrase_and_correct(text):
|
281 |
+
# Apply enhanced spell checking
|
282 |
+
text = enhanced_spell_check(text)
|
283 |
+
|
284 |
+
# Correct semantic errors
|
285 |
+
text = correct_semantic_errors(text)
|
286 |
+
|
287 |
+
# Apply existing corrections
|
288 |
+
text = remove_redundant_words(text)
|
289 |
+
text = capitalize_sentences_and_nouns(text)
|
290 |
+
text = correct_tense_errors(text)
|
291 |
+
text = correct_singular_plural_errors(text)
|
292 |
+
text = correct_article_errors(text)
|
293 |
+
text = enhance_punctuation(text)
|
294 |
+
text = handle_possessives(text)
|
295 |
+
text = rephrase_with_synonyms(text)
|
296 |
+
text = correct_double_negatives(text)
|
297 |
+
text = ensure_subject_verb_agreement(text)
|
298 |
+
|
299 |
+
return text
|
300 |
+
|
301 |
+
# Gradio interface setup
|
302 |
def gradio_interface(text):
|
303 |
label, score = detect_ai(text)
|
304 |
corrected_text = paraphrase_and_correct(text)
|
305 |
return {label: score}, corrected_text
|
306 |
|
307 |
+
# Create Gradio interface
|
308 |
iface = gr.Interface(
|
309 |
fn=gradio_interface,
|
310 |
inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
|
|
|
317 |
)
|
318 |
|
319 |
# Launch the app
|
320 |
+
iface.launch()
|