sashtech commited on
Commit
cf3f184
·
verified ·
1 Parent(s): f18ad55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -41
app.py CHANGED
@@ -7,14 +7,16 @@ import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
  import re
10
- from inflect import engine # For pluralization
11
 
12
  # Initialize the English text classification pipeline for AI detection
13
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
14
 
15
- # Initialize the spell checker and inflect engine
16
  spell = SpellChecker()
17
- inflect_engine = engine()
 
 
18
 
19
  # Ensure necessary NLTK data is downloaded
20
  nltk.download('wordnet')
@@ -27,7 +29,7 @@ except OSError:
27
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
28
  nlp = spacy.load("en_core_web_sm")
29
 
30
- # Function to predict AI detection
31
  def predict_en(text):
32
  res = pipeline_en(text)[0]
33
  return res['label'], res['score']
@@ -37,66 +39,247 @@ def get_synonyms_nltk(word, pos):
37
  synsets = wordnet.synsets(word, pos=pos)
38
  if synsets:
39
  lemmas = synsets[0].lemmas()
40
- return [lemma.name() for lemma in lemmas if lemma.name() != word] # Avoid original word
41
  return []
42
 
43
- # Function to remove redundant words
44
  def remove_redundant_words(text):
 
45
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
46
- return ' '.join(word for word in text.split() if word.lower() not in meaningless_words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Function to correct spelling errors
49
  def correct_spelling(text):
50
  words = text.split()
51
- corrected_words = [spell.correction(word) for word in words]
 
 
 
52
  return ' '.join(corrected_words)
53
 
54
- # Function to rephrase text with synonyms
 
 
 
 
 
 
 
 
 
 
 
55
  def rephrase_with_synonyms(text):
56
  doc = nlp(text)
57
  rephrased_text = []
58
 
59
  for token in doc:
60
- pos_tag = {
61
- "NOUN": wordnet.NOUN,
62
- "VERB": wordnet.VERB,
63
- "ADJ": wordnet.ADJ,
64
- "ADV": wordnet.ADV
65
- }.get(token.pos_, None)
 
 
 
 
 
 
 
66
 
67
  if pos_tag:
68
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
69
- synonym = synonyms[0] if synonyms else token.text
70
- rephrased_text.append(synonym)
 
 
 
 
 
 
 
 
 
 
71
  else:
72
  rephrased_text.append(token.text)
73
 
74
  return ' '.join(rephrased_text)
75
 
76
- # Function to paraphrase and correct grammar
77
  def paraphrase_and_correct(text):
 
78
  cleaned_text = remove_redundant_words(text)
79
- cleaned_text = correct_spelling(cleaned_text)
80
- return rephrase_with_synonyms(cleaned_text)
81
-
82
- # Function to handle user input
83
- def process_text(input_text):
84
- ai_label, ai_score = predict_en(input_text)
85
-
86
- if ai_label == "HUMAN":
87
- corrected_text = paraphrase_and_correct(input_text)
88
- return corrected_text
89
- else:
90
- return "The text seems to be AI-generated; no correction applied."
91
-
92
- # Gradio interface
93
- iface = gr.Interface(
94
- fn=process_text,
95
- inputs=gr.Textbox(lines=10, placeholder="Enter your text here..."),
96
- outputs=gr.Textbox(label="Corrected Text"),
97
- title="Text Correction and Rephrasing",
98
- description="This app corrects and rephrases text while detecting AI-generated content."
99
- )
100
-
101
- # Launch the interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  iface.launch()
 
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
  import re
10
+ import inflect
11
 
12
  # Initialize the English text classification pipeline for AI detection
13
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
14
 
15
+ # Initialize the spell checker
16
  spell = SpellChecker()
17
+
18
+ # Initialize the inflect engine for pluralization
19
+ inflect_engine = inflect.engine()
20
 
21
  # Ensure necessary NLTK data is downloaded
22
  nltk.download('wordnet')
 
29
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
30
  nlp = spacy.load("en_core_web_sm")
31
 
32
+ # Function to predict the label and score for English text (AI Detection)
33
  def predict_en(text):
34
  res = pipeline_en(text)[0]
35
  return res['label'], res['score']
 
39
  synsets = wordnet.synsets(word, pos=pos)
40
  if synsets:
41
  lemmas = synsets[0].lemmas()
42
+ return [lemma.name() for lemma in lemmas if lemma.name() != word]
43
  return []
44
 
45
+ # Function to remove redundant and meaningless words
46
  def remove_redundant_words(text):
47
+ doc = nlp(text)
48
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
49
+ filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
50
+ return ' '.join(filtered_text)
51
+
52
+ # Function to capitalize the first letter of sentences and proper nouns
53
+ def capitalize_sentences_and_nouns(text):
54
+ doc = nlp(text)
55
+ corrected_text = []
56
+
57
+ for sent in doc.sents:
58
+ sentence = []
59
+ for token in sent:
60
+ if token.i == sent.start: # First word of the sentence
61
+ sentence.append(token.text.capitalize())
62
+ elif token.pos_ == "PROPN": # Proper noun
63
+ sentence.append(token.text.capitalize())
64
+ else:
65
+ sentence.append(token.text)
66
+ corrected_text.append(' '.join(sentence))
67
+
68
+ return ' '.join(corrected_text)
69
+
70
+ # Function to correct tense errors in a sentence
71
+ def correct_tense_errors(text):
72
+ doc = nlp(text)
73
+ corrected_text = []
74
+ for token in doc:
75
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
76
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
77
+ corrected_text.append(lemma)
78
+ else:
79
+ corrected_text.append(token.text)
80
+ return ' '.join(corrected_text)
81
+
82
+ # Function to correct singular/plural errors
83
+ def correct_singular_plural_errors(text):
84
+ doc = nlp(text)
85
+ corrected_text = []
86
+
87
+ for token in doc:
88
+ if token.pos_ == "NOUN":
89
+ if token.tag_ == "NN": # Singular noun
90
+ if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
91
+ corrected_text.append(inflect_engine.plural(token.lemma_))
92
+ else:
93
+ corrected_text.append(token.text)
94
+ elif token.tag_ == "NNS": # Plural noun
95
+ if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
96
+ corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
97
+ else:
98
+ corrected_text.append(token.text)
99
+ else:
100
+ corrected_text.append(token.text)
101
+
102
+ return ' '.join(corrected_text)
103
+
104
+ # Function to check and correct article errors
105
+ def correct_article_errors(text):
106
+ doc = nlp(text)
107
+ corrected_text = []
108
+ for token in doc:
109
+ if token.text in ['a', 'an']:
110
+ next_token = token.nbor(1)
111
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
112
+ corrected_text.append("an")
113
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
114
+ corrected_text.append("a")
115
+ else:
116
+ corrected_text.append(token.text)
117
+ else:
118
+ corrected_text.append(token.text)
119
+ return ' '.join(corrected_text)
120
+
121
+ # Function to get the correct synonym while maintaining verb form
122
+ def replace_with_synonym(token):
123
+ pos = None
124
+ if token.pos_ == "VERB":
125
+ pos = wordnet.VERB
126
+ elif token.pos_ == "NOUN":
127
+ pos = wordnet.NOUN
128
+ elif token.pos_ == "ADJ":
129
+ pos = wordnet.ADJ
130
+ elif token.pos_ == "ADV":
131
+ pos = wordnet.ADV
132
+
133
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
134
+
135
+ if synonyms:
136
+ synonym = synonyms[0]
137
+ if token.tag_ == "VBG": # Present participle (e.g., running)
138
+ synonym = synonym + 'ing'
139
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
140
+ synonym = synonym + 'ed'
141
+ elif token.tag_ == "VBZ": # Third-person singular present
142
+ synonym = synonym + 's'
143
+ return synonym
144
+ return token.text
145
+
146
+ # Function to check for and avoid double negatives
147
+ def correct_double_negatives(text):
148
+ doc = nlp(text)
149
+ corrected_text = []
150
+ for token in doc:
151
+ if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
152
+ corrected_text.append("always")
153
+ else:
154
+ corrected_text.append(token.text)
155
+ return ' '.join(corrected_text)
156
+
157
+ # Function to ensure subject-verb agreement
158
+ def ensure_subject_verb_agreement(text):
159
+ doc = nlp(text)
160
+ corrected_text = []
161
+ for token in doc:
162
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
163
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
164
+ corrected_text.append(token.head.lemma_ + "s")
165
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
166
+ corrected_text.append(token.head.lemma_)
167
+ corrected_text.append(token.text)
168
+ return ' '.join(corrected_text)
169
 
170
  # Function to correct spelling errors
171
  def correct_spelling(text):
172
  words = text.split()
173
+ corrected_words = []
174
+ for word in words:
175
+ corrected_word = spell.correction(word)
176
+ corrected_words.append(corrected_word if corrected_word else word) # Keep original if correction is None
177
  return ' '.join(corrected_words)
178
 
179
+ # Function to correct punctuation issues
180
+ def correct_punctuation(text):
181
+ text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove space before punctuation
182
+ text = re.sub(r'([?.!,";:])\s+', r'\1 ', text) # Ensure a single space after punctuation
183
+ return text
184
+
185
+ # Function to ensure correct handling of possessive forms
186
+ def handle_possessives(text):
187
+ text = re.sub(r"\b(\w+)'s\b", r"\1's", text) # Preserve possessive forms
188
+ return text
189
+
190
+ # Function to rephrase text and replace words with their synonyms while maintaining form
191
  def rephrase_with_synonyms(text):
192
  doc = nlp(text)
193
  rephrased_text = []
194
 
195
  for token in doc:
196
+ if token.pos_ == "NOUN" and token.text.lower() == "earth":
197
+ rephrased_text.append("Earth")
198
+ continue
199
+
200
+ pos_tag = None
201
+ if token.pos_ == "NOUN":
202
+ pos_tag = wordnet.NOUN
203
+ elif token.pos_ == "VERB":
204
+ pos_tag = wordnet.VERB
205
+ elif token.pos_ == "ADJ":
206
+ pos_tag = wordnet.ADJ
207
+ elif token.pos_ == "ADV":
208
+ pos_tag = wordnet.ADV
209
 
210
  if pos_tag:
211
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
212
+ if synonyms:
213
+ synonym = synonyms[0] # Just using the first synonym for simplicity
214
+ if token.pos_ == "VERB":
215
+ if token.tag_ == "VBG": # Present participle (e.g., running)
216
+ synonym = synonym + 'ing'
217
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
218
+ synonym = synonym + 'ed'
219
+ elif token.tag_ == "VBZ": # Third-person singular present
220
+ synonym = synonym + 's'
221
+ rephrased_text.append(synonym)
222
+ else:
223
+ rephrased_text.append(token.text)
224
  else:
225
  rephrased_text.append(token.text)
226
 
227
  return ' '.join(rephrased_text)
228
 
229
+ # Function to paraphrase and correct grammar with enhanced accuracy
230
  def paraphrase_and_correct(text):
231
+ # Remove meaningless or redundant words first
232
  cleaned_text = remove_redundant_words(text)
233
+
234
+ # Capitalize sentences and nouns
235
+ paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
236
+
237
+ # Correct tense errors
238
+ paraphrased_text = correct_tense_errors(paraphrased_text)
239
+
240
+ # Correct singular/plural errors
241
+ paraphrased_text = correct_singular_plural_errors(paraphrased_text)
242
+
243
+ # Correct article errors
244
+ paraphrased_text = correct_article_errors(paraphrased_text)
245
+
246
+ # Correct spelling
247
+ paraphrased_text = correct_spelling(paraphrased_text)
248
+
249
+ # Correct punctuation issues
250
+ paraphrased_text = correct_punctuation(paraphrased_text)
251
+
252
+ # Handle possessives
253
+ paraphrased_text = handle_possessives(paraphrased_text)
254
+
255
+ # Replace words with synonyms
256
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
257
+
258
+ # Correct double negatives
259
+ paraphrased_text = correct_double_negatives(paraphrased_text)
260
+
261
+ # Ensure subject-verb agreement
262
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
263
+
264
+ return paraphrased_text
265
+
266
+ # Function to detect AI-generated content
267
+ def detect_ai(text):
268
+ label, score = predict_en(text)
269
+ return label, score
270
+
271
+ # Gradio interface setup
272
+ def gradio_interface(text):
273
+ ai_result = detect_ai(text)
274
+ corrected_text = paraphrase_and_correct(text)
275
+ return ai_result, corrected_text
276
+
277
+ # Create Gradio interface
278
+ iface = gr.Interface(fn=gradio_interface,
279
+ inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
280
+ outputs=[gr.outputs.Label(num_top_classes=2), gr.outputs.Textbox()],
281
+ title="AI Detection and Grammar Correction",
282
+ description="Detect AI-generated content and correct grammar issues.")
283
+
284
+ # Launch the app
285
  iface.launch()