sashtech commited on
Commit
124e989
·
verified ·
1 Parent(s): c67ee2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -101
app.py CHANGED
@@ -6,6 +6,7 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -29,7 +30,7 @@ def predict_en(text):
29
  res = pipeline_en(text)[0]
30
  return res['label'], res['score']
31
 
32
- # Function to get synonyms using NLTK WordNet
33
  def get_synonyms_nltk(word, pos):
34
  synsets = wordnet.synsets(word, pos=pos)
35
  if synsets:
@@ -37,23 +38,73 @@ def get_synonyms_nltk(word, pos):
37
  return [lemma.name() for lemma in lemmas]
38
  return []
39
 
40
- # Function to dynamically select the most relevant synonym
41
- def get_relevant_synonym(word, pos, context):
42
- synonyms = get_synonyms_nltk(word, pos)
43
- if not synonyms:
44
- return word
45
 
46
- # Basic relevance check: choose the synonym that appears most frequently in similar contexts
47
- relevant_synonym = word
48
- max_count = 0
 
 
49
 
50
- for synonym in synonyms:
51
- count = context.lower().count(synonym.lower())
52
- if count > max_count:
53
- max_count = count
54
- relevant_synonym = synonym
 
 
 
 
 
 
 
 
 
 
55
 
56
- return relevant_synonym
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  # Function to remove redundant and meaningless words
59
  def remove_redundant_words(text):
@@ -151,34 +202,6 @@ def correct_article_errors(text):
151
  corrected_text.append(token.text)
152
  return ' '.join(corrected_text)
153
 
154
- # Function to get the correct synonym while maintaining verb form
155
- def replace_with_synonym(token, context):
156
- pos = None
157
- if token.pos_ == "VERB":
158
- pos = wordnet.VERB
159
- elif token.pos_ == "NOUN":
160
- pos = wordnet.NOUN
161
- elif token.pos_ == "ADJ":
162
- pos = wordnet.ADJ
163
- elif token.pos_ == "ADV":
164
- pos = wordnet.ADV
165
-
166
- synonyms = get_synonyms_nltk(token.lemma_, pos)
167
-
168
- if synonyms:
169
- synonym = get_relevant_synonym(token.text, pos, context)
170
- if token.pos_ == "VERB":
171
- if token.tag_ == "VBG": # Present participle (e.g., running)
172
- synonym = synonym + 'ing'
173
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
174
- synonym = synonym + 'ed'
175
- elif token.tag_ == "VBZ": # Third-person singular present
176
- synonym = synonym + 's'
177
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
178
- synonym += 's' if not synonym.endswith('s') else ""
179
- return synonym
180
- return token.text
181
-
182
  # Function to check for and avoid double negatives
183
  def correct_double_negatives(text):
184
  doc = nlp(text)
@@ -203,65 +226,18 @@ def ensure_subject_verb_agreement(text):
203
  corrected_text.append(token.text)
204
  return ' '.join(corrected_text)
205
 
206
- # Function to correct spelling errors
207
  def correct_spelling(text):
208
  words = text.split()
209
  corrected_words = []
210
  for word in words:
211
  corrected_word = spell.correction(word)
 
 
 
212
  corrected_words.append(corrected_word)
213
  return ' '.join(corrected_words)
214
 
215
- # Function to rephrase text and replace words with their synonyms while maintaining form
216
- def rephrase_with_synonyms(text):
217
- doc = nlp(text)
218
- rephrased_text = []
219
-
220
- for token in doc:
221
- pos_tag = None
222
- if token.pos_ == "NOUN":
223
- pos_tag = wordnet.NOUN
224
- elif token.pos_ == "VERB":
225
- pos_tag = wordnet.VERB
226
- elif token.pos_ == "ADJ":
227
- pos_tag = wordnet.ADJ
228
- elif token.pos_ == "ADV":
229
- pos_tag = wordnet.ADV
230
-
231
- if pos_tag:
232
- synonyms = get_synonyms_nltk(token.text, pos_tag)
233
- if synonyms:
234
- synonym = get_relevant_synonym(token.text, pos_tag, text)
235
- if token.pos_ == "VERB":
236
- if token.tag_ == "VBG": # Present participle (e.g., running)
237
- synonym = synonym + 'ing'
238
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
239
- synonym = synonym + 'ed'
240
- elif token.tag_ == "VBZ": # Third-person singular present
241
- synonym = synonym + 's'
242
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
243
- synonym += 's' if not synonym.endswith('s') else ""
244
- rephrased_text.append(synonym)
245
- else:
246
- rephrased_text.append(token.text)
247
- else:
248
- rephrased_text.append(token.text)
249
-
250
- return ' '.join(rephrased_text)
251
-
252
- # Function to retain the structure of the input text (headings, paragraphs, line breaks)
253
- def retain_structure(text):
254
- lines = text.split("\n")
255
- formatted_lines = []
256
-
257
- for line in lines:
258
- if line.strip().isupper(): # Heading if all caps
259
- formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
260
- else:
261
- formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
262
-
263
- return "\n".join(formatted_lines)
264
-
265
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
266
  def paraphrase_and_correct(text):
267
  # Retain the structure (headings, paragraphs, line breaks)
@@ -286,16 +262,13 @@ def paraphrase_and_correct(text):
286
  paraphrased_text = correct_double_negatives(paraphrased_text)
287
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
288
 
289
- # Rephrase with synonyms while maintaining grammatical forms
290
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
291
 
292
  # Correct spelling errors
293
  paraphrased_text = correct_spelling(paraphrased_text)
294
 
295
- # Reapply the structure to the final output
296
- final_output = retain_structure(paraphrased_text)
297
-
298
- return final_output
299
 
300
  # Gradio app setup with two tabs
301
  with gr.Blocks() as demo:
@@ -309,8 +282,8 @@ with gr.Blocks() as demo:
309
  button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
310
 
311
  with gr.Tab("Paraphrasing & Grammar Correction"):
312
- t2 = gr.Textbox(lines=5, label='Enter text for Humanifying')
313
- button2 = gr.Button("🔄 Humanifier")
314
  result2 = gr.Textbox(lines=5, label='Corrected Text')
315
 
316
  # Connect the paraphrasing and correction function to the button
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
+ import random # Import random for versatile synonym replacement
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
30
  res = pipeline_en(text)[0]
31
  return res['label'], res['score']
32
 
33
+ # Enhanced function to get synonyms using NLTK WordNet
34
  def get_synonyms_nltk(word, pos):
35
  synsets = wordnet.synsets(word, pos=pos)
36
  if synsets:
 
38
  return [lemma.name() for lemma in lemmas]
39
  return []
40
 
41
+ # Retain the structure of the input text (headings, paragraphs, line breaks)
42
+ def retain_structure(text):
43
+ lines = text.split("\n")
44
+ formatted_lines = []
 
45
 
46
+ for line in lines:
47
+ if line.strip().isupper(): # Heading if all caps
48
+ formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
49
+ else:
50
+ formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
51
 
52
+ return "\n".join(formatted_lines)
53
+
54
+ # Dynamic and versatile synonym replacement
55
+ def replace_with_synonym(token):
56
+ pos = None
57
+ if token.pos_ == "VERB":
58
+ pos = wordnet.VERB
59
+ elif token.pos_ == "NOUN":
60
+ pos = wordnet.NOUN
61
+ elif token.pos_ == "ADJ":
62
+ pos = wordnet.ADJ
63
+ elif token.pos_ == "ADV":
64
+ pos = wordnet.ADV
65
+
66
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
67
 
68
+ if synonyms:
69
+ # Randomly choose a synonym to add more versatility
70
+ synonym = random.choice(synonyms)
71
+ if token.tag_ == "VBG": # Present participle (e.g., running)
72
+ synonym = synonym + 'ing'
73
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
74
+ synonym = synonym + 'ed'
75
+ elif token.tag_ == "VBZ": # Third-person singular present
76
+ synonym = synonym + 's'
77
+ return synonym
78
+ return token.text
79
+
80
+ # Function to rephrase text and replace words with versatile synonyms
81
+ def rephrase_with_synonyms(text):
82
+ doc = nlp(text)
83
+ rephrased_text = []
84
+
85
+ for token in doc:
86
+ pos_tag = None
87
+ if token.pos_ == "NOUN":
88
+ pos_tag = wordnet.NOUN
89
+ elif token.pos_ == "VERB":
90
+ pos_tag = wordnet.VERB
91
+ elif token.pos_ == "ADJ":
92
+ pos_tag = wordnet.ADJ
93
+ elif token.pos_ == "ADV":
94
+ pos_tag = wordnet.ADV
95
+
96
+ if pos_tag:
97
+ synonyms = get_synonyms_nltk(token.text, pos_tag)
98
+ if synonyms:
99
+ # Use the dynamic synonym replacement for versatility
100
+ synonym = replace_with_synonym(token)
101
+ rephrased_text.append(synonym)
102
+ else:
103
+ rephrased_text.append(token.text)
104
+ else:
105
+ rephrased_text.append(token.text)
106
+
107
+ return ' '.join(rephrased_text)
108
 
109
  # Function to remove redundant and meaningless words
110
  def remove_redundant_words(text):
 
202
  corrected_text.append(token.text)
203
  return ' '.join(corrected_text)
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # Function to check for and avoid double negatives
206
  def correct_double_negatives(text):
207
  doc = nlp(text)
 
226
  corrected_text.append(token.text)
227
  return ' '.join(corrected_text)
228
 
229
+ # Function to correct spelling errors and handle None cases
230
  def correct_spelling(text):
231
  words = text.split()
232
  corrected_words = []
233
  for word in words:
234
  corrected_word = spell.correction(word)
235
+ # If spell.correction returns None, use the original word
236
+ if corrected_word is None:
237
+ corrected_word = word
238
  corrected_words.append(corrected_word)
239
  return ' '.join(corrected_words)
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
242
  def paraphrase_and_correct(text):
243
  # Retain the structure (headings, paragraphs, line breaks)
 
262
  paraphrased_text = correct_double_negatives(paraphrased_text)
263
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
264
 
265
+ # Rephrase with versatile synonyms while maintaining grammatical forms
266
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
267
 
268
  # Correct spelling errors
269
  paraphrased_text = correct_spelling(paraphrased_text)
270
 
271
+ return paraphrased_text
 
 
 
272
 
273
  # Gradio app setup with two tabs
274
  with gr.Blocks() as demo:
 
282
  button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
283
 
284
  with gr.Tab("Paraphrasing & Grammar Correction"):
285
+ t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
286
+ button2 = gr.Button("🔄 Paraphrase and Correct")
287
  result2 = gr.Textbox(lines=5, label='Corrected Text')
288
 
289
  # Connect the paraphrasing and correction function to the button