sashtech commited on
Commit
96d6bc7
·
verified ·
1 Parent(s): 8a0139f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -94
app.py CHANGED
@@ -6,6 +6,7 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -21,7 +22,7 @@ nltk.download('omw-1.4')
21
  try:
22
  nlp = spacy.load("en_core_web_sm")
23
  except OSError:
24
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
25
  nlp = spacy.load("en_core_web_sm")
26
 
27
  # Function to predict the label and score for English text (AI Detection)
@@ -133,66 +134,17 @@ def correct_article_errors(text):
133
  corrected_text.append(token.text)
134
  return ' '.join(corrected_text)
135
 
136
- # Function to get the correct synonym while maintaining verb form
137
- def replace_with_synonym(token):
138
- pos = None
139
- if token.pos_ == "VERB":
140
- pos = wordnet.VERB
141
- elif token.pos_ == "NOUN":
142
- pos = wordnet.NOUN
143
- elif token.pos_ == "ADJ":
144
- pos = wordnet.ADJ
145
- elif token.pos_ == "ADV":
146
- pos = wordnet.ADV
147
-
148
  synonyms = get_synonyms_nltk(token.lemma_, pos)
149
-
150
  if synonyms:
151
- synonym = synonyms[0]
152
- if token.tag_ == "VBG": # Present participle (e.g., running)
153
- synonym = synonym + 'ing'
154
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
155
- synonym = synonym + 'ed'
156
- elif token.tag_ == "VBZ": # Third-person singular present
157
- synonym = synonym + 's'
158
- return synonym
159
  return token.text
160
 
161
- # Function to check for and avoid double negatives
162
- def correct_double_negatives(text):
163
- doc = nlp(text)
164
- corrected_text = []
165
- for token in doc:
166
- if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
167
- corrected_text.append("always")
168
- else:
169
- corrected_text.append(token.text)
170
- return ' '.join(corrected_text)
171
-
172
- # Function to ensure subject-verb agreement
173
- def ensure_subject_verb_agreement(text):
174
- doc = nlp(text)
175
- corrected_text = []
176
- for token in doc:
177
- if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
178
- if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
179
- corrected_text.append(token.head.lemma_ + "s")
180
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
181
- corrected_text.append(token.head.lemma_)
182
- corrected_text.append(token.text)
183
- return ' '.join(corrected_text)
184
-
185
- # Function to correct spelling errors
186
- def correct_spelling(text):
187
- words = text.split()
188
- corrected_words = []
189
- for word in words:
190
- corrected_word = spell.correction(word)
191
- corrected_words.append(corrected_word)
192
- return ' '.join(corrected_words)
193
-
194
- # Function to rephrase text and replace words with their synonyms while maintaining form
195
- def rephrase_with_synonyms(text):
196
  doc = nlp(text)
197
  rephrased_text = []
198
 
@@ -206,29 +158,25 @@ def rephrase_with_synonyms(text):
206
  pos_tag = wordnet.ADJ
207
  elif token.pos_ == "ADV":
208
  pos_tag = wordnet.ADV
209
-
210
  if pos_tag:
211
- synonyms = get_synonyms_nltk(token.text, pos_tag)
212
- if synonyms:
213
- synonym = synonyms[0] # Just using the first synonym for simplicity
214
- if token.pos_ == "VERB":
215
- if token.tag_ == "VBG": # Present participle (e.g., running)
216
- synonym = synonym + 'ing'
217
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
218
- synonym = synonym + 'ed'
219
- elif token.tag_ == "VBZ": # Third-person singular present
220
- synonym = synonym + 's'
221
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
222
- synonym += 's' if not synonym.endswith('s') else ""
223
- rephrased_text.append(synonym)
224
- else:
225
- rephrased_text.append(token.text)
226
  else:
227
  rephrased_text.append(token.text)
228
 
229
  return ' '.join(rephrased_text)
230
 
231
- # Retain the structure of the input text (headings, paragraphs, line breaks)
232
  def retain_structure(text):
233
  lines = text.split("\n")
234
  formatted_lines = []
@@ -242,35 +190,24 @@ def retain_structure(text):
242
  return "\n".join(formatted_lines)
243
 
244
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
245
- def paraphrase_and_correct(text):
246
- # Retain the structure (headings, paragraphs, line breaks)
247
  structured_text = retain_structure(text)
248
 
249
- # Remove meaningless or redundant words first
250
- cleaned_text = remove_redundant_words(structured_text)
251
-
252
- # Capitalize sentences and nouns
253
- paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
254
 
255
- # Ensure first letter of each sentence is capitalized
 
 
256
  paraphrased_text = force_first_letter_capital(paraphrased_text)
257
-
258
- # Handle possessives properly
259
  paraphrased_text = handle_possessives(paraphrased_text)
260
-
261
- # Apply grammatical corrections
262
  paraphrased_text = correct_article_errors(paraphrased_text)
263
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
264
  paraphrased_text = correct_tense_errors(paraphrased_text)
265
  paraphrased_text = correct_double_negatives(paraphrased_text)
266
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
267
-
268
- # Rephrase with synonyms while maintaining grammatical forms
269
- paraphrased_text = rephrase_with_synonyms(paraphrased_text)
270
-
271
- # Correct spelling errors
272
  paraphrased_text = correct_spelling(paraphrased_text)
273
-
274
  return paraphrased_text
275
 
276
  # Gradio app setup with two tabs
@@ -290,6 +227,6 @@ with gr.Blocks() as demo:
290
  result2 = gr.Textbox(lines=5, label='Corrected Text')
291
 
292
  # Connect the paraphrasing and correction function to the button
293
- button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
294
 
295
- demo.launch(share=True) # Share=True to create a public link
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
+ import random
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
22
  try:
23
  nlp = spacy.load("en_core_web_sm")
24
  except OSError:
25
+ subprocess.run(["python", "-m",", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
  # Function to predict the label and score for English text (AI Detection)
 
134
  corrected_text.append(token.text)
135
  return ' '.join(corrected_text)
136
 
137
+ # Function to dynamically choose synonyms with more options
138
+ def dynamic_synonyms(token, pos):
 
 
 
 
 
 
 
 
 
 
139
  synonyms = get_synonyms_nltk(token.lemma_, pos)
140
+ # Choose a random synonym to increase variety
141
  if synonyms:
142
+ random_synonym = random.choice(synonyms)
143
+ return random_synonym
 
 
 
 
 
 
144
  return token.text
145
 
146
+ # Function to rephrase text and replace words with more versatile synonyms
147
+ def versatile_rephrase(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  doc = nlp(text)
149
  rephrased_text = []
150
 
 
158
  pos_tag = wordnet.ADJ
159
  elif token.pos_ == "ADV":
160
  pos_tag = wordnet.ADV
161
+
162
  if pos_tag:
163
+ synonym = dynamic_synonyms(token, pos_tag)
164
+ if token.pos_ == "VERB":
165
+ if token.tag_ == "VBG": # Present participle (e.g., running)
166
+ synonym = synonym + 'ing'
167
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
168
+ synonym = synonym + 'ed'
169
+ elif token.tag_ == "VBZ": # Third-person singular present
170
+ synonym = synonym + 's'
171
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
172
+ synonym += 's' if not synonym ends with 's' else ""
173
+ rephrased_text.append(synonym)
 
 
 
 
174
  else:
175
  rephrased_text.append(token.text)
176
 
177
  return ' '.join(rephrased_text)
178
 
179
+ # Function to retain the structure of the input text (headings, paragraphs, line breaks)
180
  def retain_structure(text):
181
  lines = text.split("\n")
182
  formatted_lines = []
 
190
  return "\n".join(formatted_lines)
191
 
192
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
193
+ def paraphrase_and_correct_with_structure(text):
 
194
  structured_text = retain_structure(text)
195
 
196
+ # Rephrase with more versatile synonyms while maintaining grammatical forms
197
+ paraphrased_text = versatile_rephrase(structured_text)
 
 
 
198
 
199
+ # Apply grammatical corrections on the rephrased text
200
+ paraphrased_text = remove_redundant_words(paraphrased_text)
201
+ paraphrased_text = capitalize_sentences_and_nouns(paraphrased_text)
202
  paraphrased_text = force_first_letter_capital(paraphrased_text)
 
 
203
  paraphrased_text = handle_possessives(paraphrased_text)
 
 
204
  paraphrased_text = correct_article_errors(paraphrased_text)
205
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
206
  paraphrased_text = correct_tense_errors(paraphrased_text)
207
  paraphrased_text = correct_double_negatives(paraphrased_text)
208
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
 
 
 
 
 
209
  paraphrased_text = correct_spelling(paraphrased_text)
210
+
211
  return paraphrased_text
212
 
213
  # Gradio app setup with two tabs
 
227
  result2 = gr.Textbox(lines=5, label='Corrected Text')
228
 
229
  # Connect the paraphrasing and correction function to the button
230
+ button2.click(fn=paraphrase_and_correct_with_structure, inputs=t2, outputs=result2)
231
 
232
+ demo.launch(share=True)