sashdev commited on
Commit
48d050c
·
verified ·
1 Parent(s): 344d74a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -125
app.py CHANGED
@@ -81,128 +81,6 @@ def plagiarism_removal(text):
81
 
82
  return " ".join(corrected_text)
83
 
84
- def predict_en(text):
85
- res = pipeline_en(text)[0]
86
- return res['label'], res['score']
87
-
88
- def remove_redundant_words(text):
89
- doc = nlp(text)
90
- meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
91
- filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
92
- return ' '.join(filtered_text)
93
-
94
- def fix_punctuation_spacing(text):
95
- words = text.split(' ')
96
- cleaned_words = []
97
- punctuation_marks = {',', '.', "'", '!', '?', ':'}
98
-
99
- for word in words:
100
- if cleaned_words and word and word[0] in punctuation_marks:
101
- cleaned_words[-1] += word
102
- else:
103
- cleaned_words.append(word)
104
-
105
- return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
106
- .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
107
-
108
- def fix_possessives(text):
109
- text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
110
- return text
111
-
112
- def capitalize_sentences_and_nouns(text):
113
- doc = nlp(text)
114
- corrected_text = []
115
-
116
- for sent in doc.sents:
117
- sentence = []
118
- for token in sent:
119
- if token.i == sent.start:
120
- sentence.append(token.text.capitalize())
121
- elif token.pos_ == "PROPN":
122
- sentence.append(token.text.capitalize())
123
- else:
124
- sentence.append(token.text)
125
- corrected_text.append(' '.join(sentence))
126
-
127
- return ' '.join(corrected_text)
128
-
129
- def force_first_letter_capital(text):
130
- sentences = re.split(r'(?<=\w[.!?])\s+', text)
131
- capitalized_sentences = []
132
-
133
- for sentence in sentences:
134
- if sentence:
135
- capitalized_sentence = sentence[0].capitalize() + sentence[1:]
136
- if not re.search(r'[.!?]$', capitalized_sentence):
137
- capitalized_sentence += '.'
138
- capitalized_sentences.append(capitalized_sentence)
139
-
140
- return " ".join(capitalized_sentences)
141
-
142
- def correct_tense_errors(text):
143
- doc = nlp(text)
144
- corrected_text = []
145
- for token in doc:
146
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
147
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
148
- corrected_text.append(lemma)
149
- else:
150
- corrected_text.append(token.text)
151
- return ' '.join(corrected_text)
152
-
153
- def correct_article_errors(text):
154
- doc = nlp(text)
155
- corrected_text = []
156
- for token in doc:
157
- if token.text in ['a', 'an']:
158
- next_token = token.nbor(1)
159
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
160
- corrected_text.append("an")
161
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
162
- corrected_text.append("a")
163
- else:
164
- corrected_text.append(token.text)
165
- else:
166
- corrected_text.append(token.text)
167
- return ' '.join(corrected_text)
168
-
169
- def ensure_subject_verb_agreement(text):
170
- doc = nlp(text)
171
- corrected_text = []
172
- for token in doc:
173
- if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
174
- if token.tag_ == "NN" and token.head.tag_ != "VBZ":
175
- corrected_text.append(token.head.lemma_ + "s")
176
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
177
- corrected_text.append(token.head.lemma_)
178
- corrected_text.append(token.text)
179
- return ' '.join(corrected_text)
180
-
181
- def correct_spelling(text):
182
- words = text.split()
183
- corrected_words = []
184
- for word in words:
185
- corrected_word = spell.correction(word)
186
- if corrected_word is not None:
187
- corrected_words.append(corrected_word)
188
- else:
189
- corrected_words.append(word)
190
- return ' '.join(corrected_words)
191
-
192
-
193
-
194
-
195
- def correct_grammar_with_gpt(text):
196
- # Initialize a pipeline for text generation with a suitable model for grammar correction
197
- gpt_model = pipeline("text-generation", model="gpt2") # Replace with a grammar correction model if available
198
-
199
- # Generate a corrected version of the text
200
- corrected_output = gpt_model(text, max_length=50)[0]['generated_text']
201
-
202
- return corrected_output.strip()
203
-
204
-
205
-
206
  def paraphrase_and_correct(text):
207
  paragraphs = text.split("\n\n") # Split by paragraphs
208
 
@@ -219,11 +97,10 @@ def paraphrase_and_correct(text):
219
  paraphrased_text = fix_possessives(paraphrased_text)
220
  paraphrased_text = correct_spelling(paraphrased_text)
221
  paraphrased_text = fix_punctuation_spacing(paraphrased_text)
 
222
 
223
- # Use GPT for final grammar correction
224
- final_corrected_text = correct_grammar_with_gpt(paraphrased_text)
225
 
226
- return final_corrected_text
227
  # Gradio app setup
228
  with gr.Blocks() as demo:
229
  with gr.Tab("AI Detection"):
 
81
 
82
  return " ".join(corrected_text)
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def paraphrase_and_correct(text):
85
  paragraphs = text.split("\n\n") # Split by paragraphs
86
 
 
97
  paraphrased_text = fix_possessives(paraphrased_text)
98
  paraphrased_text = correct_spelling(paraphrased_text)
99
  paraphrased_text = fix_punctuation_spacing(paraphrased_text)
100
+ processed_paragraphs.append(paraphrased_text)
101
 
102
+ return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
 
103
 
 
104
  # Gradio app setup
105
  with gr.Blocks() as demo:
106
  with gr.Tab("AI Detection"):