sashtech commited on
Commit
04919b2
·
verified ·
1 Parent(s): fa69dbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -14
app.py CHANGED
@@ -1,11 +1,40 @@
1
- # Added more redundant/filler words
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  def remove_redundant_words(text):
3
  doc = nlp(text)
4
- meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply", "that", "kind of", "sort of", "you know", "honestly", "seriously"}
 
5
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
6
  return ' '.join(filtered_text)
7
 
8
- # Capitalize sentences and proper nouns
9
  def capitalize_sentences_and_nouns(text):
10
  doc = nlp(text)
11
  corrected_text = []
@@ -19,7 +48,7 @@ def capitalize_sentences_and_nouns(text):
19
  corrected_text.append(' '.join(sentence))
20
  return ' '.join(corrected_text)
21
 
22
- # Function to dynamically correct tenses and verb forms
23
  def correct_tense_errors(text):
24
  doc = nlp(text)
25
  corrected_text = []
@@ -31,7 +60,7 @@ def correct_tense_errors(text):
31
  corrected_text.append(token.text)
32
  return ' '.join(corrected_text)
33
 
34
- # Enhanced function to handle subject-verb agreement
35
  def ensure_subject_verb_agreement(text):
36
  doc = nlp(text)
37
  corrected_text = []
@@ -47,19 +76,93 @@ def ensure_subject_verb_agreement(text):
47
  corrected_text.append(token.text)
48
  return ' '.join(corrected_text)
49
 
50
- # Ensure proper apostrophe usage and possessives
51
  def correct_apostrophes(text):
52
  text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
53
  text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
54
  return text
55
 
56
- # Enhanced punctuation
57
  def enhance_punctuation(text):
58
  text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
59
  text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
 
 
 
60
  return text
61
 
62
- # Paraphrasing using synonyms and correcting semantic errors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def rephrase_with_synonyms(text):
64
  doc = nlp(text)
65
  rephrased_text = []
@@ -88,26 +191,39 @@ def rephrase_with_synonyms(text):
88
 
89
  return ' '.join(rephrased_text)
90
 
91
- # Comprehensive text correction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def paraphrase_and_correct(text):
93
  text = enhanced_spell_check(text)
94
  text = remove_redundant_words(text)
95
  text = capitalize_sentences_and_nouns(text)
96
  text = correct_tense_errors(text)
97
- text = correct_singular_plural_errors(text)
98
- text = correct_article_errors(text)
99
  text = enhance_punctuation(text)
100
  text = correct_apostrophes(text)
 
101
  text = rephrase_with_synonyms(text)
102
- text = correct_double_negatives(text)
103
- text = ensure_subject_verb_agreement(text)
104
  return text
105
 
106
- # Integrate with Gradio UI
107
  def gradio_interface(text):
108
  corrected_text = paraphrase_and_correct(text)
109
  return corrected_text
110
 
 
111
  iface = gr.Interface(
112
  fn=gradio_interface,
113
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
@@ -115,5 +231,6 @@ iface = gr.Interface(
115
  title="Grammar & Semantic Error Correction",
116
  )
117
 
 
118
  if __name__ == "__main__":
119
  iface.launch()
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+ import spacy
5
+ import nltk
6
+ from nltk.corpus import wordnet
7
+ from spellchecker import SpellChecker
8
+ import re
9
+ import inflect
10
+
11
+ # Initialize components
12
+ try:
13
+ nlp = spacy.load("en_core_web_sm")
14
+ except OSError:
15
+ print("Downloading spaCy model...")
16
+ spacy.cli.download("en_core_web_sm")
17
+ nlp = spacy.load("en_core_web_sm")
18
+
19
+ # Initialize the spell checker
20
+ spell = SpellChecker()
21
+
22
+ # Initialize the inflect engine for pluralization
23
+ inflect_engine = inflect.engine()
24
+
25
+ # Ensure necessary NLTK data is downloaded
26
+ nltk.download('wordnet', quiet=True)
27
+ nltk.download('omw-1.4', quiet=True)
28
+
29
+ # Function to remove redundant/filler words
30
  def remove_redundant_words(text):
31
  doc = nlp(text)
32
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply",
33
+ "that", "kind of", "sort of", "you know", "honestly", "seriously"}
34
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
35
  return ' '.join(filtered_text)
36
 
37
+ # Function to capitalize sentences and proper nouns
38
  def capitalize_sentences_and_nouns(text):
39
  doc = nlp(text)
40
  corrected_text = []
 
48
  corrected_text.append(' '.join(sentence))
49
  return ' '.join(corrected_text)
50
 
51
+ # Function to correct verb tenses
52
  def correct_tense_errors(text):
53
  doc = nlp(text)
54
  corrected_text = []
 
60
  corrected_text.append(token.text)
61
  return ' '.join(corrected_text)
62
 
63
+ # Function to ensure subject-verb agreement
64
  def ensure_subject_verb_agreement(text):
65
  doc = nlp(text)
66
  corrected_text = []
 
76
  corrected_text.append(token.text)
77
  return ' '.join(corrected_text)
78
 
79
+ # Function to correct apostrophe usage
80
  def correct_apostrophes(text):
81
  text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
82
  text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
83
  return text
84
 
85
+ # Function to enhance punctuation usage
86
  def enhance_punctuation(text):
87
  text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
88
  text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
89
+ text = re.sub(r'\s*"\s*', '" ', text).strip() # Clean up spaces around quotes
90
+ text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
91
+ text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text) # Ensure sentences start with capitalized words
92
  return text
93
 
94
+ # Function to correct semantic errors and replace with more appropriate words
95
+ def correct_semantic_errors(text):
96
+ semantic_corrections = {
97
+ "animate_being": "animal",
98
+ "little": "smallest",
99
+ "big": "largest",
100
+ "mammalian": "mammals",
101
+ "universe": "world",
102
+ "manner": "ways",
103
+ "continue": "preserve",
104
+ "dirt": "soil",
105
+ "wellness": "health",
106
+ "modulate": "regulate",
107
+ "clime": "climate",
108
+ "function": "role",
109
+ "keeping": "maintaining",
110
+ "lend": "contribute",
111
+ "better": "improve",
112
+ "cardinal": "key",
113
+ "expeditiously": "efficiently",
114
+ "marauder": "predator",
115
+ "quarry": "prey",
116
+ "forestalling": "preventing",
117
+ "bend": "turn",
118
+ "works": "plant",
119
+ "croping": "grazing",
120
+ "flora": "vegetation",
121
+ "dynamical": "dynamic",
122
+ "alteration": "change",
123
+ "add-on": "addition",
124
+ "indispensable": "essential",
125
+ "nutrient": "food",
126
+ "harvest": "crops",
127
+ "pollenateing": "pollinating",
128
+ "divers": "diverse",
129
+ "beginning": "source",
130
+ "homo": "humans",
131
+ "fall_in": "collapse",
132
+ "takeing": "leading",
133
+ "coinage": "species",
134
+ "trust": "rely",
135
+ "angleworm": "earthworm",
136
+ "interrupt": "break",
137
+ "affair": "matter",
138
+ "air_out": "aerate",
139
+ "alimentary": "nutrient",
140
+ "distributeed": "spread",
141
+ "country": "areas",
142
+ "reconstruct": "restore",
143
+ "debauched": "degraded",
144
+ "giant": "whales",
145
+ "organic_structure": "bodies",
146
+ "decease": "die",
147
+ "carcase": "carcasses",
148
+ "pin_downing": "trapping",
149
+ "cut_downs": "reduces",
150
+ "ambiance": "atmosphere",
151
+ "extenuateing": "mitigating",
152
+ "decision": "conclusion",
153
+ "doing": "making",
154
+ "prolongs": "sustains",
155
+ "home_ground": "habitats",
156
+ "continueing": "preserving",
157
+ "populateing": "living",
158
+ "beingness": "beings"
159
+ }
160
+
161
+ words = text.split()
162
+ corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
163
+ return ' '.join(corrected_words)
164
+
165
+ # Function to rephrase using synonyms and adjust verb forms
166
  def rephrase_with_synonyms(text):
167
  doc = nlp(text)
168
  rephrased_text = []
 
191
 
192
  return ' '.join(rephrased_text)
193
 
194
+ # Function to apply enhanced spell check
195
+ def enhanced_spell_check(text):
196
+ words = text.split()
197
+ corrected_words = []
198
+ for word in words:
199
+ if '_' in word:
200
+ sub_words = word.split('_')
201
+ corrected_sub_words = [spell.correction(w) or w for w in sub_words]
202
+ corrected_words.append('_'.join(corrected_sub_words))
203
+ else:
204
+ corrected_word = spell.correction(word) or word
205
+ corrected_words.append(corrected_word)
206
+ return ' '.join(corrected_words)
207
+
208
+ # Comprehensive function to correct the entire text
209
  def paraphrase_and_correct(text):
210
  text = enhanced_spell_check(text)
211
  text = remove_redundant_words(text)
212
  text = capitalize_sentences_and_nouns(text)
213
  text = correct_tense_errors(text)
214
+ text = ensure_subject_verb_agreement(text)
 
215
  text = enhance_punctuation(text)
216
  text = correct_apostrophes(text)
217
+ text = correct_semantic_errors(text)
218
  text = rephrase_with_synonyms(text)
 
 
219
  return text
220
 
221
+ # Gradio interface function
222
  def gradio_interface(text):
223
  corrected_text = paraphrase_and_correct(text)
224
  return corrected_text
225
 
226
+ # Setting up Gradio interface
227
  iface = gr.Interface(
228
  fn=gradio_interface,
229
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
 
231
  title="Grammar & Semantic Error Correction",
232
  )
233
 
234
+ # Run the Gradio interface
235
  if __name__ == "__main__":
236
  iface.launch()