sashtech commited on
Commit
7b6fe3d
·
verified ·
1 Parent(s): 5da5cc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -78
app.py CHANGED
@@ -6,6 +6,7 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -29,7 +30,7 @@ def predict_en(text):
29
  res = pipeline_en(text)[0]
30
  return res['label'], res['score']
31
 
32
- # Function to get synonyms using NLTK WordNet
33
  def get_synonyms_nltk(word, pos):
34
  synsets = wordnet.synsets(word, pos=pos)
35
  if synsets:
@@ -37,6 +38,74 @@ def get_synonyms_nltk(word, pos):
37
  return [lemma.name() for lemma in lemmas]
38
  return []
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Function to remove redundant and meaningless words
41
  def remove_redundant_words(text):
42
  doc = nlp(text)
@@ -133,31 +202,6 @@ def correct_article_errors(text):
133
  corrected_text.append(token.text)
134
  return ' '.join(corrected_text)
135
 
136
- # Function to get the correct synonym while maintaining verb form
137
- def replace_with_synonym(token):
138
- pos = None
139
- if token.pos_ == "VERB":
140
- pos = wordnet.VERB
141
- elif token.pos_ == "NOUN":
142
- pos = wordnet.NOUN
143
- elif token.pos_ == "ADJ":
144
- pos = wordnet.ADJ
145
- elif token.pos_ == "ADV":
146
- pos = wordnet.ADV
147
-
148
- synonyms = get_synonyms_nltk(token.lemma_, pos)
149
-
150
- if synonyms:
151
- synonym = synonyms[0]
152
- if token.tag_ == "VBG": # Present participle (e.g., running)
153
- synonym = synonym + 'ing'
154
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
155
- synonym = synonym + 'ed'
156
- elif token.tag_ == "VBZ": # Third-person singular present
157
- synonym = synonym + 's'
158
- return synonym
159
- return token.text
160
-
161
  # Function to check for and avoid double negatives
162
  def correct_double_negatives(text):
163
  doc = nlp(text)
@@ -191,57 +235,6 @@ def correct_spelling(text):
191
  corrected_words.append(corrected_word)
192
  return ' '.join(corrected_words)
193
 
194
- # Function to rephrase text and replace words with their synonyms while maintaining form
195
- def rephrase_with_synonyms(text):
196
- doc = nlp(text)
197
- rephrased_text = []
198
-
199
- for token in doc:
200
- pos_tag = None
201
- if token.pos_ == "NOUN":
202
- pos_tag = wordnet.NOUN
203
- elif token.pos_ == "VERB":
204
- pos_tag = wordnet.VERB
205
- elif token.pos_ == "ADJ":
206
- pos_tag = wordnet.ADJ
207
- elif token.pos_ == "ADV":
208
- pos_tag = wordnet.ADV
209
-
210
- if pos_tag:
211
- synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
212
- if synonyms:
213
- # Use a more dynamic approach for synonyms
214
- synonym = max(synonyms, key=lambda s: wordnet.synsets(s, pos=pos_tag)) # Select based on the number of synsets
215
- if token.pos_ == "VERB":
216
- if token.tag_ == "VBG": # Present participle (e.g., running)
217
- synonym = synonym + 'ing'
218
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
219
- synonym = synonym + 'ed'
220
- elif token.tag_ == "VBZ": # Third-person singular present
221
- synonym = synonym + 's'
222
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
223
- synonym += 's' if not synonym.endswith('s') else ""
224
- rephrased_text.append(synonym)
225
- else:
226
- rephrased_text.append(token.text)
227
- else:
228
- rephrased_text.append(token.text)
229
-
230
- return ' '.join(rephrased_text)
231
-
232
- # Retain the structure of the input text (headings, paragraphs, line breaks)
233
- def retain_structure(text):
234
- lines = text.split("\n")
235
- formatted_lines = []
236
-
237
- for line in lines:
238
- if line.strip().isupper(): # Heading if all caps
239
- formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
240
- else:
241
- formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
242
-
243
- return "\n".join(formatted_lines)
244
-
245
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
246
  def paraphrase_and_correct(text):
247
  # Retain the structure (headings, paragraphs, line breaks)
@@ -266,7 +259,7 @@ def paraphrase_and_correct(text):
266
  paraphrased_text = correct_double_negatives(paraphrased_text)
267
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
268
 
269
- # Rephrase with synonyms while maintaining grammatical forms
270
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
271
 
272
  # Correct spelling errors
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
+ import random # Import random for versatile synonym replacement
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
30
  res = pipeline_en(text)[0]
31
  return res['label'], res['score']
32
 
33
+ # Enhanced function to get synonyms using NLTK WordNet
34
  def get_synonyms_nltk(word, pos):
35
  synsets = wordnet.synsets(word, pos=pos)
36
  if synsets:
 
38
  return [lemma.name() for lemma in lemmas]
39
  return []
40
 
41
+ # Retain the structure of the input text (headings, paragraphs, line breaks)
42
+ def retain_structure(text):
43
+ lines = text.split("\n")
44
+ formatted_lines = []
45
+
46
+ for line in lines:
47
+ if line.strip().isupper(): # Heading if all caps
48
+ formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
49
+ else:
50
+ formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
51
+
52
+ return "\n".join(formatted_lines)
53
+
54
+ # Dynamic and versatile synonym replacement
55
+ def replace_with_synonym(token):
56
+ pos = None
57
+ if token.pos_ == "VERB":
58
+ pos = wordnet.VERB
59
+ elif token.pos_ == "NOUN":
60
+ pos = wordnet.NOUN
61
+ elif token.pos_ == "ADJ":
62
+ pos = wordnet.ADJ
63
+ elif token.pos_ == "ADV":
64
+ pos = wordnet.ADV
65
+
66
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
67
+
68
+ if synonyms:
69
+ # Randomly choose a synonym to add more versatility
70
+ synonym = random.choice(synonyms)
71
+ if token.tag_ == "VBG": # Present participle (e.g., running)
72
+ synonym = synonym + 'ing'
73
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
74
+ synonym = synonym + 'ed'
75
+ elif token.tag_ == "VBZ": # Third-person singular present
76
+ synonym = synonym + 's'
77
+ return synonym
78
+ return token.text
79
+
80
+ # Function to rephrase text and replace words with versatile synonyms
81
+ def rephrase_with_synonyms(text):
82
+ doc = nlp(text)
83
+ rephrased_text = []
84
+
85
+ for token in doc:
86
+ pos_tag = None
87
+ if token.pos_ == "NOUN":
88
+ pos_tag = wordnet.NOUN
89
+ elif token.pos_ == "VERB":
90
+ pos_tag = wordnet.VERB
91
+ elif token.pos_ == "ADJ":
92
+ pos_tag = wordnet.ADJ
93
+ elif token.pos_ == "ADV":
94
+ pos_tag = wordnet.ADV
95
+
96
+ if pos_tag:
97
+ synonyms = get_synonyms_nltk(token.text, pos_tag)
98
+ if synonyms:
99
+ # Use the dynamic synonym replacement for versatility
100
+ synonym = replace_with_synonym(token)
101
+ rephrased_text.append(synonym)
102
+ else:
103
+ rephrased_text.append(token.text)
104
+ else:
105
+ rephrased_text.append(token.text)
106
+
107
+ return ' '.join(rephrased_text)
108
+
109
  # Function to remove redundant and meaningless words
110
  def remove_redundant_words(text):
111
  doc = nlp(text)
 
202
  corrected_text.append(token.text)
203
  return ' '.join(corrected_text)
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # Function to check for and avoid double negatives
206
  def correct_double_negatives(text):
207
  doc = nlp(text)
 
235
  corrected_words.append(corrected_word)
236
  return ' '.join(corrected_words)
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
239
  def paraphrase_and_correct(text):
240
  # Retain the structure (headings, paragraphs, line breaks)
 
259
  paraphrased_text = correct_double_negatives(paraphrased_text)
260
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
261
 
262
+ # Rephrase with versatile synonyms while maintaining grammatical forms
263
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
264
 
265
  # Correct spelling errors