sashtech commited on
Commit
c67ee2a
·
verified ·
1 Parent(s): 7b6fe3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -70
app.py CHANGED
@@ -6,7 +6,6 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
- import random # Import random for versatile synonym replacement
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -30,7 +29,7 @@ def predict_en(text):
30
  res = pipeline_en(text)[0]
31
  return res['label'], res['score']
32
 
33
- # Enhanced function to get synonyms using NLTK WordNet
34
  def get_synonyms_nltk(word, pos):
35
  synsets = wordnet.synsets(word, pos=pos)
36
  if synsets:
@@ -38,73 +37,23 @@ def get_synonyms_nltk(word, pos):
38
  return [lemma.name() for lemma in lemmas]
39
  return []
40
 
41
- # Retain the structure of the input text (headings, paragraphs, line breaks)
42
- def retain_structure(text):
43
- lines = text.split("\n")
44
- formatted_lines = []
 
45
 
46
- for line in lines:
47
- if line.strip().isupper(): # Heading if all caps
48
- formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
49
- else:
50
- formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
51
 
52
- return "\n".join(formatted_lines)
53
-
54
- # Dynamic and versatile synonym replacement
55
- def replace_with_synonym(token):
56
- pos = None
57
- if token.pos_ == "VERB":
58
- pos = wordnet.VERB
59
- elif token.pos_ == "NOUN":
60
- pos = wordnet.NOUN
61
- elif token.pos_ == "ADJ":
62
- pos = wordnet.ADJ
63
- elif token.pos_ == "ADV":
64
- pos = wordnet.ADV
65
-
66
- synonyms = get_synonyms_nltk(token.lemma_, pos)
67
 
68
- if synonyms:
69
- # Randomly choose a synonym to add more versatility
70
- synonym = random.choice(synonyms)
71
- if token.tag_ == "VBG": # Present participle (e.g., running)
72
- synonym = synonym + 'ing'
73
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
74
- synonym = synonym + 'ed'
75
- elif token.tag_ == "VBZ": # Third-person singular present
76
- synonym = synonym + 's'
77
- return synonym
78
- return token.text
79
-
80
- # Function to rephrase text and replace words with versatile synonyms
81
- def rephrase_with_synonyms(text):
82
- doc = nlp(text)
83
- rephrased_text = []
84
-
85
- for token in doc:
86
- pos_tag = None
87
- if token.pos_ == "NOUN":
88
- pos_tag = wordnet.NOUN
89
- elif token.pos_ == "VERB":
90
- pos_tag = wordnet.VERB
91
- elif token.pos_ == "ADJ":
92
- pos_tag = wordnet.ADJ
93
- elif token.pos_ == "ADV":
94
- pos_tag = wordnet.ADV
95
-
96
- if pos_tag:
97
- synonyms = get_synonyms_nltk(token.text, pos_tag)
98
- if synonyms:
99
- # Use the dynamic synonym replacement for versatility
100
- synonym = replace_with_synonym(token)
101
- rephrased_text.append(synonym)
102
- else:
103
- rephrased_text.append(token.text)
104
- else:
105
- rephrased_text.append(token.text)
106
-
107
- return ' '.join(rephrased_text)
108
 
109
  # Function to remove redundant and meaningless words
110
  def remove_redundant_words(text):
@@ -202,6 +151,34 @@ def correct_article_errors(text):
202
  corrected_text.append(token.text)
203
  return ' '.join(corrected_text)
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # Function to check for and avoid double negatives
206
  def correct_double_negatives(text):
207
  doc = nlp(text)
@@ -235,6 +212,56 @@ def correct_spelling(text):
235
  corrected_words.append(corrected_word)
236
  return ' '.join(corrected_words)
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
239
  def paraphrase_and_correct(text):
240
  # Retain the structure (headings, paragraphs, line breaks)
@@ -259,13 +286,16 @@ def paraphrase_and_correct(text):
259
  paraphrased_text = correct_double_negatives(paraphrased_text)
260
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
261
 
262
- # Rephrase with versatile synonyms while maintaining grammatical forms
263
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
264
 
265
  # Correct spelling errors
266
  paraphrased_text = correct_spelling(paraphrased_text)
267
 
268
- return paraphrased_text
 
 
 
269
 
270
  # Gradio app setup with two tabs
271
  with gr.Blocks() as demo:
@@ -279,8 +309,8 @@ with gr.Blocks() as demo:
279
  button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
280
 
281
  with gr.Tab("Paraphrasing & Grammar Correction"):
282
- t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
283
- button2 = gr.Button("🔄 Paraphrase and Correct")
284
  result2 = gr.Textbox(lines=5, label='Corrected Text')
285
 
286
  # Connect the paraphrasing and correction function to the button
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
29
  res = pipeline_en(text)[0]
30
  return res['label'], res['score']
31
 
32
+ # Function to get synonyms using NLTK WordNet
33
  def get_synonyms_nltk(word, pos):
34
  synsets = wordnet.synsets(word, pos=pos)
35
  if synsets:
 
37
  return [lemma.name() for lemma in lemmas]
38
  return []
39
 
40
+ # Function to dynamically select the most relevant synonym
41
+ def get_relevant_synonym(word, pos, context):
42
+ synonyms = get_synonyms_nltk(word, pos)
43
+ if not synonyms:
44
+ return word
45
 
46
+ # Basic relevance check: choose the synonym that appears most frequently in similar contexts
47
+ relevant_synonym = word
48
+ max_count = 0
 
 
49
 
50
+ for synonym in synonyms:
51
+ count = context.lower().count(synonym.lower())
52
+ if count > max_count:
53
+ max_count = count
54
+ relevant_synonym = synonym
 
 
 
 
 
 
 
 
 
 
55
 
56
+ return relevant_synonym
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  # Function to remove redundant and meaningless words
59
  def remove_redundant_words(text):
 
151
  corrected_text.append(token.text)
152
  return ' '.join(corrected_text)
153
 
154
+ # Function to get the correct synonym while maintaining verb form
155
+ def replace_with_synonym(token, context):
156
+ pos = None
157
+ if token.pos_ == "VERB":
158
+ pos = wordnet.VERB
159
+ elif token.pos_ == "NOUN":
160
+ pos = wordnet.NOUN
161
+ elif token.pos_ == "ADJ":
162
+ pos = wordnet.ADJ
163
+ elif token.pos_ == "ADV":
164
+ pos = wordnet.ADV
165
+
166
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
167
+
168
+ if synonyms:
169
+ synonym = get_relevant_synonym(token.text, pos, context)
170
+ if token.pos_ == "VERB":
171
+ if token.tag_ == "VBG": # Present participle (e.g., running)
172
+ synonym = synonym + 'ing'
173
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
174
+ synonym = synonym + 'ed'
175
+ elif token.tag_ == "VBZ": # Third-person singular present
176
+ synonym = synonym + 's'
177
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
178
+ synonym += 's' if not synonym.endswith('s') else ""
179
+ return synonym
180
+ return token.text
181
+
182
  # Function to check for and avoid double negatives
183
  def correct_double_negatives(text):
184
  doc = nlp(text)
 
212
  corrected_words.append(corrected_word)
213
  return ' '.join(corrected_words)
214
 
215
+ # Function to rephrase text and replace words with their synonyms while maintaining form
216
+ def rephrase_with_synonyms(text):
217
+ doc = nlp(text)
218
+ rephrased_text = []
219
+
220
+ for token in doc:
221
+ pos_tag = None
222
+ if token.pos_ == "NOUN":
223
+ pos_tag = wordnet.NOUN
224
+ elif token.pos_ == "VERB":
225
+ pos_tag = wordnet.VERB
226
+ elif token.pos_ == "ADJ":
227
+ pos_tag = wordnet.ADJ
228
+ elif token.pos_ == "ADV":
229
+ pos_tag = wordnet.ADV
230
+
231
+ if pos_tag:
232
+ synonyms = get_synonyms_nltk(token.text, pos_tag)
233
+ if synonyms:
234
+ synonym = get_relevant_synonym(token.text, pos_tag, text)
235
+ if token.pos_ == "VERB":
236
+ if token.tag_ == "VBG": # Present participle (e.g., running)
237
+ synonym = synonym + 'ing'
238
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
239
+ synonym = synonym + 'ed'
240
+ elif token.tag_ == "VBZ": # Third-person singular present
241
+ synonym = synonym + 's'
242
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
243
+ synonym += 's' if not synonym.endswith('s') else ""
244
+ rephrased_text.append(synonym)
245
+ else:
246
+ rephrased_text.append(token.text)
247
+ else:
248
+ rephrased_text.append(token.text)
249
+
250
+ return ' '.join(rephrased_text)
251
+
252
+ # Function to retain the structure of the input text (headings, paragraphs, line breaks)
253
+ def retain_structure(text):
254
+ lines = text.split("\n")
255
+ formatted_lines = []
256
+
257
+ for line in lines:
258
+ if line.strip().isupper(): # Heading if all caps
259
+ formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
260
+ else:
261
+ formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
262
+
263
+ return "\n".join(formatted_lines)
264
+
265
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
266
  def paraphrase_and_correct(text):
267
  # Retain the structure (headings, paragraphs, line breaks)
 
286
  paraphrased_text = correct_double_negatives(paraphrased_text)
287
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
288
 
289
+ # Rephrase with synonyms while maintaining grammatical forms
290
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
291
 
292
  # Correct spelling errors
293
  paraphrased_text = correct_spelling(paraphrased_text)
294
 
295
+ # Reapply the structure to the final output
296
+ final_output = retain_structure(paraphrased_text)
297
+
298
+ return final_output
299
 
300
  # Gradio app setup with two tabs
301
  with gr.Blocks() as demo:
 
309
  button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
310
 
311
  with gr.Tab("Paraphrasing & Grammar Correction"):
312
+ t2 = gr.Textbox(lines=5, label='Enter text for Humanifying')
313
+ button2 = gr.Button("🔄 Humanifier")
314
  result2 = gr.Textbox(lines=5, label='Corrected Text')
315
 
316
  # Connect the paraphrasing and correction function to the button