sashtech commited on
Commit
486bbd6
·
verified ·
1 Parent(s): a1d4c88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +270 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
@@ -18,6 +19,275 @@ spell = SpellChecker()
18
  nltk.download('wordnet')
19
  nltk.download('omw-1.4')
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Ensure the SpaCy model is installed
22
  try:
23
  nlp = spacy.load("en_core_web_sm")
 
1
  import os
2
+ import gradio as grimport os
3
  import gradio as gr
4
  from transformers import pipeline
5
  import spacy
 
19
  nltk.download('wordnet')
20
  nltk.download('omw-1.4')
21
 
22
+ # Ensure the SpaCy model is installed
23
+ try:
24
+ nlp = spacy.load("en_core_web_sm")
25
+ except OSError:
26
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
27
+ nlp = spacy.load("en_core_web_sm")
28
+
29
+ # Function to predict the label and score for English text (AI Detection)
30
+ def predict_en(text):
31
+ res = pipeline_en(text)[0]
32
+ return res['label'], res['score']
33
+
34
+ # Function to get synonyms using NLTK WordNet
35
+ def get_synonyms_nltk(word, pos):
36
+ synsets = wordnet.synsets(word, pos=pos)
37
+ if synsets:
38
+ lemmas = synsets[0].lemmas()
39
+ return [lemma.name() for lemma in lemmas]
40
+ return []
41
+
42
+ # Function to remove redundant and meaningless words
43
+ def remove_redundant_words(text):
44
+ doc = nlp(text)
45
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
46
+ filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
47
+ return ' '.join(filtered_text)
48
+
49
+ # Function to capitalize the first letter of sentences and proper nouns
50
+ def capitalize_sentences_and_nouns(text):
51
+ doc = nlp(text)
52
+ corrected_text = []
53
+
54
+ for sent in doc.sents:
55
+ sentence = []
56
+ for token in sent:
57
+ if token.i == sent.start: # First word of the sentence
58
+ sentence.append(token.text.capitalize())
59
+ elif token.pos_ == "PROPN": # Proper noun
60
+ sentence.append(token.text.capitalize())
61
+ else:
62
+ sentence.append(token.text)
63
+ corrected_text.append(' '.join(sentence))
64
+
65
+ return ' '.join(corrected_text)
66
+
67
+ # Function to correct tense errors in a sentence
68
+ def correct_tense_errors(text):
69
+ doc = nlp(text)
70
+ corrected_text = []
71
+ for token in doc:
72
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
73
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
74
+ corrected_text.append(lemma)
75
+ else:
76
+ corrected_text.append(token.text)
77
+ return ' '.join(corrected_text)
78
+
79
+ # Function to correct singular/plural errors
80
+ def correct_singular_plural_errors(text):
81
+ doc = nlp(text)
82
+ corrected_text = []
83
+
84
+ for token in doc:
85
+ if token.pos_ == "NOUN":
86
+ if token.tag_ == "NN": # Singular noun
87
+ if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
88
+ corrected_text.append(token.lemma_ + 's')
89
+ else:
90
+ corrected_text.append(token.text)
91
+ elif token.tag_ == "NNS": # Plural noun
92
+ if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
93
+ corrected_text.append(token.lemma_)
94
+ else:
95
+ corrected_text.append(token.text)
96
+ else:
97
+ corrected_text.append(token.text)
98
+
99
+ return ' '.join(corrected_text)
100
+
101
+ # Function to check and correct article errors
102
+ def correct_article_errors(text):
103
+ doc = nlp(text)
104
+ corrected_text = []
105
+ for token in doc:
106
+ if token.text in ['a', 'an']:
107
+ next_token = token.nbor(1)
108
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
109
+ corrected_text.append("an")
110
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
111
+ corrected_text.append("a")
112
+ else:
113
+ corrected_text.append(token.text)
114
+ else:
115
+ corrected_text.append(token.text)
116
+ return ' '.join(corrected_text)
117
+
118
+ # Function to get the correct synonym while maintaining verb form
119
+ def replace_with_synonym(token):
120
+ pos = None
121
+ if token.pos_ == "VERB":
122
+ pos = wordnet.VERB
123
+ elif token.pos_ == "NOUN":
124
+ pos = wordnet.NOUN
125
+ elif token.pos_ == "ADJ":
126
+ pos = wordnet.ADJ
127
+ elif token.pos_ == "ADV":
128
+ pos = wordnet.ADV
129
+
130
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
131
+
132
+ if synonyms:
133
+ synonym = synonyms[0]
134
+ if token.tag_ == "VBG": # Present participle (e.g., running)
135
+ synonym = synonym + 'ing'
136
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
137
+ synonym = synonym + 'ed'
138
+ elif token.tag_ == "VBZ": # Third-person singular present
139
+ synonym = synonym + 's'
140
+ return synonym
141
+ return token.text
142
+
143
+ # Function to check for and avoid double negatives
144
+ def correct_double_negatives(text):
145
+ doc = nlp(text)
146
+ corrected_text = []
147
+ for token in doc:
148
+ if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
149
+ corrected_text.append("always")
150
+ else:
151
+ corrected_text.append(token.text)
152
+ return ' '.join(corrected_text)
153
+
154
+ # Function to ensure subject-verb agreement
155
+ def ensure_subject_verb_agreement(text):
156
+ doc = nlp(text)
157
+ corrected_text = []
158
+ for token in doc:
159
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
160
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
161
+ corrected_text.append(token.head.lemma_ + "s")
162
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
163
+ corrected_text.append(token.head.lemma_)
164
+ corrected_text.append(token.text)
165
+ return ' '.join(corrected_text)
166
+
167
+ # Function to correct spelling errors
168
+ def correct_spelling(text):
169
+ words = text.split()
170
+ corrected_words = []
171
+ for word in words:
172
+ corrected_word = spell.correction(word)
173
+ corrected_words.append(corrected_word if corrected_word else word) # Keep original if correction is None
174
+ return ' '.join(corrected_words)
175
+
176
+ # Function to correct punctuation issues
177
+ def correct_punctuation(text):
178
+ text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove space before punctuation
179
+ text = re.sub(r'([?.!,";:])\s+', r'\1 ', text) # Ensure a single space after punctuation
180
+ return text
181
+
182
+ # Function to ensure correct handling of possessive forms
183
+ def handle_possessives(text):
184
+ text = re.sub(r"\b(\w+)'s\b", r"\1's", text) # Preserve possessive forms
185
+ return text
186
+
187
+ # Function to rephrase text and replace words with their synonyms while maintaining form
188
+ def rephrase_with_synonyms(text):
189
+ doc = nlp(text)
190
+ rephrased_text = []
191
+
192
+ for token in doc:
193
+ if token.pos_ == "NOUN" and token.text.lower() == "earth":
194
+ rephrased_text.append("Earth")
195
+ continue
196
+
197
+ pos_tag = None
198
+ if token.pos_ == "NOUN":
199
+ pos_tag = wordnet.NOUN
200
+ elif token.pos_ == "VERB":
201
+ pos_tag = wordnet.VERB
202
+ elif token.pos_ == "ADJ":
203
+ pos_tag = wordnet.ADJ
204
+ elif token.pos_ == "ADV":
205
+ pos_tag = wordnet.ADV
206
+
207
+ if pos_tag:
208
+ synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
209
+ if synonyms:
210
+ synonym = synonyms[0] # Just using the first synonym for simplicity
211
+ if token.pos_ == "VERB":
212
+ if token.tag_ == "VBG": # Present participle (e.g., running)
213
+ synonym = synonym + 'ing'
214
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
215
+ synonym = synonym + 'ed'
216
+ elif token.tag_ == "VBZ": # Third-person singular present
217
+ synonym = synonym + 's'
218
+ rephrased_text.append(synonym)
219
+ else:
220
+ rephrased_text.append(token.text)
221
+ else:
222
+ rephrased_text.append(token.text)
223
+
224
+ return ' '.join(rephrased_text)
225
+
226
+ # Function to paraphrase and correct grammar with enhanced accuracy
227
+ def paraphrase_and_correct(text):
228
+ # Remove meaningless or redundant words first
229
+ cleaned_text = remove_redundant_words(text)
230
+
231
+ # Capitalize sentences and nouns
232
+ paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
233
+
234
+ # Correct tense and singular/plural errors
235
+ paraphrased_text = correct_tense_errors(paraphrased_text)
236
+ paraphrased_text = correct_singular_plural_errors(paraphrased_text)
237
+ paraphrased_text = correct_article_errors(paraphrased_text)
238
+ paraphrased_text = correct_double_negatives(paraphrased_text)
239
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
240
+
241
+ # Correct spelling and punctuation
242
+ paraphrased_text = correct_spelling(paraphrased_text)
243
+ paraphrased_text = correct_punctuation(paraphrased_text)
244
+ paraphrased_text = handle_possessives(paraphrased_text) # Handle possessives
245
+
246
+ # Rephrase with synonyms
247
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
248
+
249
+ # Force capitalization of the first letter of each sentence
250
+ final_text = force_first_letter_capital(paraphrased_text)
251
+
252
+ return final_text
253
+
254
+ # Gradio Interface
255
+ def process_text(input_text):
256
+ ai_label, ai_score = predict_en(input_text)
257
+ corrected_text = paraphrase_and_correct(input_text)
258
+ return ai_label, ai_score, corrected_text
259
+
260
+ # Create Gradio interface
261
+ iface = gr.Interface(
262
+ fn=process_text,
263
+ inputs="text",
264
+ outputs=["text", "number", "text"],
265
+ title="AI Content Detection and Grammar Correction",
266
+ description="Enter text to detect AI-generated content and correct grammar."
267
+ )
268
+
269
+ # Launch the Gradio app
270
+ if __name__ == "__main__":
271
+ iface.launch()
272
+
273
+ from transformers import pipeline
274
+ import spacy
275
+ import subprocess
276
+ import nltk
277
+ from nltk.corpus import wordnet
278
+ from spellchecker import SpellChecker
279
+ import re
280
+
281
+ # Initialize the English text classification pipeline for AI detection
282
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
283
+
284
+ # Initialize the spell checker
285
+ spell = SpellChecker()
286
+
287
+ # Ensure necessary NLTK data is downloaded
288
+ nltk.download('wordnet')
289
+ nltk.download('omw-1.4')
290
+
291
  # Ensure the SpaCy model is installed
292
  try:
293
  nlp = spacy.load("en_core_web_sm")