sashtech commited on
Commit
353216c
·
verified ·
1 Parent(s): a2b6ad0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -53
app.py CHANGED
@@ -2,13 +2,13 @@ import os
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
5
- import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
  import re
10
  import inflect
11
 
 
12
  try:
13
  nlp = spacy.load("en_core_web_sm")
14
  except OSError:
@@ -16,8 +16,6 @@ except OSError:
16
  spacy.cli.download("en_core_web_sm")
17
  nlp = spacy.load("en_core_web_sm")
18
 
19
-
20
-
21
  # Initialize the English text classification pipeline for AI detection
22
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
23
 
@@ -31,9 +29,6 @@ inflect_engine = inflect.engine()
31
  nltk.download('wordnet')
32
  nltk.download('omw-1.4')
33
 
34
- # Load the SpaCy model
35
- nlp = spacy.load("en_core_web_sm")
36
-
37
  # Function to predict the label and score for English text (AI Detection)
38
  def predict_en(text):
39
  res = pipeline_en(text)[0]
@@ -172,22 +167,67 @@ def ensure_subject_verb_agreement(text):
172
  corrected_text.append(token.text)
173
  return ' '.join(corrected_text)
174
 
175
- # Function to correct spelling errors
176
- def correct_spelling(text):
177
  words = text.split()
178
  corrected_words = []
179
  for word in words:
180
- corrected_word = spell.correction(word)
181
- corrected_words.append(corrected_word if corrected_word else word)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  return ' '.join(corrected_words)
183
 
184
- # Function to correct punctuation issues
185
- def correct_punctuation(text):
 
186
  text = re.sub(r'\s+([?.!,";:])', r'\1', text)
187
- text = re.sub(r'([?.!,";:])\s+', r'\1 ', text)
 
 
 
 
 
 
 
 
 
188
  return text
189
 
190
- # Function to ensure correct handling of possessive forms
191
  def handle_possessives(text):
192
  text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
193
  return text
@@ -231,53 +271,40 @@ def rephrase_with_synonyms(text):
231
 
232
  return ' '.join(rephrased_text)
233
 
234
- # Function to paraphrase and correct grammar with enhanced accuracy
235
- def paraphrase_and_correct(text):
236
- # Remove meaningless or redundant words first
237
- cleaned_text = remove_redundant_words(text)
238
-
239
- # Capitalize sentences and proper nouns
240
- cleaned_text = capitalize_sentences_and_nouns(cleaned_text)
241
-
242
- # Correct tense errors
243
- cleaned_text = correct_tense_errors(cleaned_text)
244
-
245
- # Correct singular/plural errors
246
- cleaned_text = correct_singular_plural_errors(cleaned_text)
247
-
248
- # Correct article errors
249
- cleaned_text = correct_article_errors(cleaned_text)
250
-
251
- # Correct spelling
252
- cleaned_text = correct_spelling(cleaned_text)
253
-
254
- # Correct punctuation issues
255
- cleaned_text = correct_punctuation(cleaned_text)
256
-
257
- # Handle possessives
258
- cleaned_text = handle_possessives(cleaned_text)
259
-
260
- # Replace words with synonyms
261
- cleaned_text = rephrase_with_synonyms(cleaned_text)
262
-
263
- # Correct double negatives
264
- cleaned_text = correct_double_negatives(cleaned_text)
265
-
266
- # Ensure subject-verb agreement
267
- cleaned_text = ensure_subject_verb_agreement(cleaned_text)
268
-
269
- return cleaned_text
270
-
271
  # Function to detect AI-generated content
272
  def detect_ai(text):
273
  label, score = predict_en(text)
274
  return label, score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  def gradio_interface(text):
276
  label, score = detect_ai(text)
277
  corrected_text = paraphrase_and_correct(text)
278
  return {label: score}, corrected_text
279
 
280
- # Modify the Gradio interface setup
281
  iface = gr.Interface(
282
  fn=gradio_interface,
283
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
@@ -290,4 +317,4 @@ iface = gr.Interface(
290
  )
291
 
292
  # Launch the app
293
- iface.launch()
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
 
5
  import nltk
6
  from nltk.corpus import wordnet
7
  from spellchecker import SpellChecker
8
  import re
9
  import inflect
10
 
11
+ # Initialize components
12
  try:
13
  nlp = spacy.load("en_core_web_sm")
14
  except OSError:
 
16
  spacy.cli.download("en_core_web_sm")
17
  nlp = spacy.load("en_core_web_sm")
18
 
 
 
19
  # Initialize the English text classification pipeline for AI detection
20
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
21
 
 
29
  nltk.download('wordnet')
30
  nltk.download('omw-1.4')
31
 
 
 
 
32
  # Function to predict the label and score for English text (AI Detection)
33
  def predict_en(text):
34
  res = pipeline_en(text)[0]
 
167
  corrected_text.append(token.text)
168
  return ' '.join(corrected_text)
169
 
170
+ # Enhance the spell checker function
171
+ def enhanced_spell_check(text):
172
  words = text.split()
173
  corrected_words = []
174
  for word in words:
175
+ if '_' in word: # Handle cases like 'animate_being'
176
+ sub_words = word.split('_')
177
+ corrected_sub_words = [spell.correction(w) for w in sub_words]
178
+ corrected_words.append('_'.join(corrected_sub_words))
179
+ else:
180
+ corrected_word = spell.correction(word)
181
+ corrected_words.append(corrected_word if corrected_word else word)
182
+ return ' '.join(corrected_words)
183
+
184
+ # Function to correct common semantic errors
185
+ def correct_semantic_errors(text):
186
+ semantic_corrections = {
187
+ "animate_being": "animal",
188
+ "little": "smallest",
189
+ "big": "largest",
190
+ "mammalian": "mammals",
191
+ "universe": "world",
192
+ "manner": "ways",
193
+ "continue": "preserve",
194
+ "dirt": "soil",
195
+ "wellness": "health",
196
+ "modulate": "regulate",
197
+ "clime": "climate",
198
+ "function": "role",
199
+ "keeping": "maintaining",
200
+ "lend": "contribute",
201
+ "better": "improve",
202
+ "is": "s",
203
+ "wite": "write",
204
+ "alos": "also",
205
+ "ads": "as",
206
+ "dictuionatr": "dictionary",
207
+ "wors": "words"
208
+ }
209
+
210
+ words = text.split()
211
+ corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
212
  return ' '.join(corrected_words)
213
 
214
+ # Enhance the punctuation correction function
215
+ def enhance_punctuation(text):
216
+ # Remove extra spaces before punctuation
217
  text = re.sub(r'\s+([?.!,";:])', r'\1', text)
218
+
219
+ # Add space after punctuation if it's missing
220
+ text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)
221
+
222
+ # Correct spacing for quotes
223
+ text = re.sub(r'\s*"\s*', '" ', text).strip()
224
+
225
+ # Ensure proper capitalization after sentence-ending punctuation
226
+ text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
227
+
228
  return text
229
 
230
+ # Function to handle possessives
231
  def handle_possessives(text):
232
  text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
233
  return text
 
271
 
272
  return ' '.join(rephrased_text)
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  # Function to detect AI-generated content
275
  def detect_ai(text):
276
  label, score = predict_en(text)
277
  return label, score
278
+
279
+ # Enhance the paraphrase_and_correct function
280
+ def paraphrase_and_correct(text):
281
+ # Apply enhanced spell checking
282
+ text = enhanced_spell_check(text)
283
+
284
+ # Correct semantic errors
285
+ text = correct_semantic_errors(text)
286
+
287
+ # Apply existing corrections
288
+ text = remove_redundant_words(text)
289
+ text = capitalize_sentences_and_nouns(text)
290
+ text = correct_tense_errors(text)
291
+ text = correct_singular_plural_errors(text)
292
+ text = correct_article_errors(text)
293
+ text = enhance_punctuation(text)
294
+ text = handle_possessives(text)
295
+ text = rephrase_with_synonyms(text)
296
+ text = correct_double_negatives(text)
297
+ text = ensure_subject_verb_agreement(text)
298
+
299
+ return text
300
+
301
+ # Gradio interface setup
302
  def gradio_interface(text):
303
  label, score = detect_ai(text)
304
  corrected_text = paraphrase_and_correct(text)
305
  return {label: score}, corrected_text
306
 
307
+ # Create Gradio interface
308
  iface = gr.Interface(
309
  fn=gradio_interface,
310
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
 
317
  )
318
 
319
  # Launch the app
320
+ iface.launch()