sashtech commited on
Commit
b6e297c
·
verified ·
1 Parent(s): 124e989

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -98
app.py CHANGED
@@ -6,7 +6,6 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
- import random # Import random for versatile synonym replacement
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -30,7 +29,7 @@ def predict_en(text):
30
  res = pipeline_en(text)[0]
31
  return res['label'], res['score']
32
 
33
- # Enhanced function to get synonyms using NLTK WordNet
34
  def get_synonyms_nltk(word, pos):
35
  synsets = wordnet.synsets(word, pos=pos)
36
  if synsets:
@@ -38,74 +37,6 @@ def get_synonyms_nltk(word, pos):
38
  return [lemma.name() for lemma in lemmas]
39
  return []
40
 
41
- # Retain the structure of the input text (headings, paragraphs, line breaks)
42
- def retain_structure(text):
43
- lines = text.split("\n")
44
- formatted_lines = []
45
-
46
- for line in lines:
47
- if line.strip().isupper(): # Heading if all caps
48
- formatted_lines.append(f"# {line.strip()}") # Treat it as a heading
49
- else:
50
- formatted_lines.append(line) # Otherwise, it's a paragraph or normal text
51
-
52
- return "\n".join(formatted_lines)
53
-
54
- # Dynamic and versatile synonym replacement
55
- def replace_with_synonym(token):
56
- pos = None
57
- if token.pos_ == "VERB":
58
- pos = wordnet.VERB
59
- elif token.pos_ == "NOUN":
60
- pos = wordnet.NOUN
61
- elif token.pos_ == "ADJ":
62
- pos = wordnet.ADJ
63
- elif token.pos_ == "ADV":
64
- pos = wordnet.ADV
65
-
66
- synonyms = get_synonyms_nltk(token.lemma_, pos)
67
-
68
- if synonyms:
69
- # Randomly choose a synonym to add more versatility
70
- synonym = random.choice(synonyms)
71
- if token.tag_ == "VBG": # Present participle (e.g., running)
72
- synonym = synonym + 'ing'
73
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
74
- synonym = synonym + 'ed'
75
- elif token.tag_ == "VBZ": # Third-person singular present
76
- synonym = synonym + 's'
77
- return synonym
78
- return token.text
79
-
80
- # Function to rephrase text and replace words with versatile synonyms
81
- def rephrase_with_synonyms(text):
82
- doc = nlp(text)
83
- rephrased_text = []
84
-
85
- for token in doc:
86
- pos_tag = None
87
- if token.pos_ == "NOUN":
88
- pos_tag = wordnet.NOUN
89
- elif token.pos_ == "VERB":
90
- pos_tag = wordnet.VERB
91
- elif token.pos_ == "ADJ":
92
- pos_tag = wordnet.ADJ
93
- elif token.pos_ == "ADV":
94
- pos_tag = wordnet.ADV
95
-
96
- if pos_tag:
97
- synonyms = get_synonyms_nltk(token.text, pos_tag)
98
- if synonyms:
99
- # Use the dynamic synonym replacement for versatility
100
- synonym = replace_with_synonym(token)
101
- rephrased_text.append(synonym)
102
- else:
103
- rephrased_text.append(token.text)
104
- else:
105
- rephrased_text.append(token.text)
106
-
107
- return ' '.join(rephrased_text)
108
-
109
  # Function to remove redundant and meaningless words
110
  def remove_redundant_words(text):
111
  doc = nlp(text)
@@ -131,26 +62,12 @@ def capitalize_sentences_and_nouns(text):
131
 
132
  return ' '.join(corrected_text)
133
 
134
- # Function to force capitalization of the first letter of every sentence
135
  def force_first_letter_capital(text):
136
  sentences = text.split(". ") # Split by period to get each sentence
137
  capitalized_sentences = [sentence[0].capitalize() + sentence[1:] if sentence else "" for sentence in sentences]
138
  return ". ".join(capitalized_sentences)
139
 
140
- # Function to handle possessive 's and retain original meaning
141
- def handle_possessives(text):
142
- doc = nlp(text)
143
- corrected_text = []
144
-
145
- for token in doc:
146
- # If token is a possessive form (e.g., 'Earth's'), retain its original form
147
- if token.text.endswith("'s") or token.text == "'s":
148
- corrected_text.append(token.text) # Keep it as is, even if a synonym is found
149
- else:
150
- corrected_text.append(token.text)
151
-
152
- return ' '.join(corrected_text)
153
-
154
  # Function to correct tense errors in a sentence
155
  def correct_tense_errors(text):
156
  doc = nlp(text)
@@ -202,6 +119,31 @@ def correct_article_errors(text):
202
  corrected_text.append(token.text)
203
  return ' '.join(corrected_text)
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # Function to check for and avoid double negatives
206
  def correct_double_negatives(text):
207
  doc = nlp(text)
@@ -226,25 +168,56 @@ def ensure_subject_verb_agreement(text):
226
  corrected_text.append(token.text)
227
  return ' '.join(corrected_text)
228
 
229
- # Function to correct spelling errors and handle None cases
230
  def correct_spelling(text):
231
  words = text.split()
232
  corrected_words = []
233
  for word in words:
234
  corrected_word = spell.correction(word)
235
- # If spell.correction returns None, use the original word
236
- if corrected_word is None:
237
- corrected_word = word
238
  corrected_words.append(corrected_word)
239
  return ' '.join(corrected_words)
240
 
241
- # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  def paraphrase_and_correct(text):
243
- # Retain the structure (headings, paragraphs, line breaks)
244
- structured_text = retain_structure(text)
245
-
246
  # Remove meaningless or redundant words first
247
- cleaned_text = remove_redundant_words(structured_text)
248
 
249
  # Capitalize sentences and nouns
250
  paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
@@ -252,9 +225,6 @@ def paraphrase_and_correct(text):
252
  # Ensure first letter of each sentence is capitalized
253
  paraphrased_text = force_first_letter_capital(paraphrased_text)
254
 
255
- # Handle possessives properly
256
- paraphrased_text = handle_possessives(paraphrased_text)
257
-
258
  # Apply grammatical corrections
259
  paraphrased_text = correct_article_errors(paraphrased_text)
260
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
@@ -262,7 +232,7 @@ def paraphrase_and_correct(text):
262
  paraphrased_text = correct_double_negatives(paraphrased_text)
263
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
264
 
265
- # Rephrase with versatile synonyms while maintaining grammatical forms
266
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
267
 
268
  # Correct spelling errors
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
29
  res = pipeline_en(text)[0]
30
  return res['label'], res['score']
31
 
32
+ # Function to get synonyms using NLTK WordNet
33
  def get_synonyms_nltk(word, pos):
34
  synsets = wordnet.synsets(word, pos=pos)
35
  if synsets:
 
37
  return [lemma.name() for lemma in lemmas]
38
  return []
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Function to remove redundant and meaningless words
41
  def remove_redundant_words(text):
42
  doc = nlp(text)
 
62
 
63
  return ' '.join(corrected_text)
64
 
65
+ # Function to force capitalization of the first letter of every sentence (NEW)
66
  def force_first_letter_capital(text):
67
  sentences = text.split(". ") # Split by period to get each sentence
68
  capitalized_sentences = [sentence[0].capitalize() + sentence[1:] if sentence else "" for sentence in sentences]
69
  return ". ".join(capitalized_sentences)
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # Function to correct tense errors in a sentence
72
  def correct_tense_errors(text):
73
  doc = nlp(text)
 
119
  corrected_text.append(token.text)
120
  return ' '.join(corrected_text)
121
 
122
+ # Function to get the correct synonym while maintaining verb form
123
+ def replace_with_synonym(token):
124
+ pos = None
125
+ if token.pos_ == "VERB":
126
+ pos = wordnet.VERB
127
+ elif token.pos_ == "NOUN":
128
+ pos = wordnet.NOUN
129
+ elif token.pos_ == "ADJ":
130
+ pos = wordnet.ADJ
131
+ elif token.pos_ == "ADV":
132
+ pos = wordnet.ADV
133
+
134
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
135
+
136
+ if synonyms:
137
+ synonym = synonyms[0]
138
+ if token.tag_ == "VBG": # Present participle (e.g., running)
139
+ synonym = synonym + 'ing'
140
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
141
+ synonym = synonym + 'ed'
142
+ elif token.tag_ == "VBZ": # Third-person singular present
143
+ synonym = synonym + 's'
144
+ return synonym
145
+ return token.text
146
+
147
  # Function to check for and avoid double negatives
148
  def correct_double_negatives(text):
149
  doc = nlp(text)
 
168
  corrected_text.append(token.text)
169
  return ' '.join(corrected_text)
170
 
171
+ # Function to correct spelling errors
172
  def correct_spelling(text):
173
  words = text.split()
174
  corrected_words = []
175
  for word in words:
176
  corrected_word = spell.correction(word)
 
 
 
177
  corrected_words.append(corrected_word)
178
  return ' '.join(corrected_words)
179
 
180
+ # Function to rephrase text and replace words with their synonyms while maintaining form
181
+ def rephrase_with_synonyms(text):
182
+ doc = nlp(text)
183
+ rephrased_text = []
184
+
185
+ for token in doc:
186
+ pos_tag = None
187
+ if token.pos_ == "NOUN":
188
+ pos_tag = wordnet.NOUN
189
+ elif token.pos_ == "VERB":
190
+ pos_tag = wordnet.VERB
191
+ elif token.pos_ == "ADJ":
192
+ pos_tag = wordnet.ADJ
193
+ elif token.pos_ == "ADV":
194
+ pos_tag = wordnet.ADV
195
+
196
+ if pos_tag:
197
+ synonyms = get_synonyms_nltk(token.text, pos_tag)
198
+ if synonyms:
199
+ synonym = synonyms[0] # Just using the first synonym for simplicity
200
+ if token.pos_ == "VERB":
201
+ if token.tag_ == "VBG": # Present participle (e.g., running)
202
+ synonym = synonym + 'ing'
203
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
204
+ synonym = synonym + 'ed'
205
+ elif token.tag_ == "VBZ": # Third-person singular present
206
+ synonym = synonym + 's'
207
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
208
+ synonym += 's' if not synonym.endswith('s') else ""
209
+ rephrased_text.append(synonym)
210
+ else:
211
+ rephrased_text.append(token.text)
212
+ else:
213
+ rephrased_text.append(token.text)
214
+
215
+ return ' '.join(rephrased_text)
216
+
217
+ # Function to paraphrase and correct grammar with enhanced accuracy
218
  def paraphrase_and_correct(text):
 
 
 
219
  # Remove meaningless or redundant words first
220
+ cleaned_text = remove_redundant_words(text)
221
 
222
  # Capitalize sentences and nouns
223
  paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
 
225
  # Ensure first letter of each sentence is capitalized
226
  paraphrased_text = force_first_letter_capital(paraphrased_text)
227
 
 
 
 
228
  # Apply grammatical corrections
229
  paraphrased_text = correct_article_errors(paraphrased_text)
230
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
 
232
  paraphrased_text = correct_double_negatives(paraphrased_text)
233
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
234
 
235
+ # Rephrase with synonyms while maintaining grammatical forms
236
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
237
 
238
  # Correct spelling errors