sashtech commited on
Commit
55748cc
·
verified ·
1 Parent(s): 1cf3f25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -57
app.py CHANGED
@@ -25,58 +25,15 @@ except OSError:
25
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
- # Function to get synonyms using NLTK WordNet and maintain original verb form
29
- def get_synonym(word, pos_tag, original_token):
30
- synsets = wordnet.synsets(word)
31
- if not synsets:
32
- return word
33
-
34
- for synset in synsets:
35
- if synset.pos() == pos_tag: # Match the part of speech
36
- synonym = synset.lemmas()[0].name()
37
-
38
- # Preserve the original verb form
39
- if original_token.tag_ in ["VBG", "VBN"]: # Present or past participle
40
- return spacy_token_form(synonym, original_token.tag_)
41
- elif original_token.tag_ in ["VBZ"]: # 3rd person singular
42
- return synonym + "s"
43
- else:
44
- return synonym
45
-
46
- return word
47
-
48
- # Function to conjugate the synonym to the correct form based on the original token's tag
49
- def spacy_token_form(synonym, tag):
50
- if tag == "VBG": # Gerund or present participle
51
- return synonym + "ing" if not synonym.endswith("ing") else synonym
52
- elif tag == "VBN": # Past participle
53
- return synonym + "ed" if not synonym.endswith("ed") else synonym
54
- return synonym
55
-
56
- # Function to rephrase text and replace words with their synonyms while maintaining form
57
- def rephrase_with_synonyms(text):
58
- doc = nlp(text)
59
- rephrased_text = []
60
-
61
- for token in doc:
62
- # Get the correct POS tag for WordNet
63
- pos_tag = None
64
- if token.pos_ == "NOUN":
65
- pos_tag = wordnet.NOUN
66
- elif token.pos_ == "VERB":
67
- pos_tag = wordnet.VERB
68
- elif token.pos_ == "ADJ":
69
- pos_tag = wordnet.ADJ
70
- elif token.pos_ == "ADV":
71
- pos_tag = wordnet.ADV
72
-
73
- if pos_tag:
74
- synonym = get_synonym(token.text, pos_tag, token)
75
- rephrased_text.append(synonym)
76
- else:
77
- rephrased_text.append(token.text)
78
-
79
- return ' '.join(rephrased_text)
80
  def capitalize_sentences_and_nouns(text):
81
  doc = nlp(text)
82
  corrected_text = []
@@ -94,6 +51,88 @@ def capitalize_sentences_and_nouns(text):
94
 
95
  return ' '.join(corrected_text)
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # Function to paraphrase and correct grammar
98
  def paraphrase_and_correct(text):
99
  paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
@@ -102,11 +141,17 @@ def paraphrase_and_correct(text):
102
  paraphrased_text = correct_article_errors(paraphrased_text)
103
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
104
  paraphrased_text = correct_tense_errors(paraphrased_text)
 
 
 
 
 
 
 
 
 
105
 
106
- # Rephrase with synonyms while maintaining grammatical forms
107
- paraphrased_text = rephrase_with_synonyms(paraphrased_text)
108
-
109
- return paraphrased_text
110
 
111
  # Gradio app setup with two tabs
112
  with gr.Blocks() as demo:
@@ -128,4 +173,4 @@ with gr.Blocks() as demo:
128
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
129
 
130
  # Launch the app with the remaining functionalities
131
- demo.launch(share=True)
 
25
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
+ # Function to get synonyms using NLTK WordNet (Humanifier)
29
+ def get_synonyms_nltk(word, pos):
30
+ synsets = wordnet.synsets(word, pos=pos)
31
+ if synsets:
32
+ lemmas = synsets[0].lemmas()
33
+ return [lemma.name() for lemma in lemmas]
34
+ return []
35
+
36
+ # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def capitalize_sentences_and_nouns(text):
38
  doc = nlp(text)
39
  corrected_text = []
 
51
 
52
  return ' '.join(corrected_text)
53
 
54
+ # Function to correct tense errors in a sentence (Tense Correction)
55
+ def correct_tense_errors(text):
56
+ doc = nlp(text)
57
+ corrected_text = []
58
+ for token in doc:
59
+ # Check for tense correction based on modal verbs
60
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
61
+ # Replace with appropriate verb form
62
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
63
+ corrected_text.append(lemma)
64
+ else:
65
+ corrected_text.append(token.text)
66
+ return ' '.join(corrected_text)
67
+
68
+ # Function to correct singular/plural errors (Singular/Plural Correction)
69
+ def correct_singular_plural_errors(text):
70
+ doc = nlp(text)
71
+ corrected_text = []
72
+
73
+ for token in doc:
74
+ if token.pos_ == "NOUN":
75
+ # Check if the noun is singular or plural
76
+ if token.tag_ == "NN": # Singular noun
77
+ # Look for determiners like "many", "several", "few" to correct to plural
78
+ if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
79
+ corrected_text.append(token.lemma_ + 's')
80
+ else:
81
+ corrected_text.append(token.text)
82
+ elif token.tag_ == "NNS": # Plural noun
83
+ # Look for determiners like "a", "one" to correct to singular
84
+ if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
85
+ corrected_text.append(token.lemma_)
86
+ else:
87
+ corrected_text.append(token.text)
88
+ else:
89
+ corrected_text.append(token.text)
90
+
91
+ return ' '.join(corrected_text)
92
+
93
+ # Function to check and correct article errors
94
+ def correct_article_errors(text):
95
+ doc = nlp(text)
96
+ corrected_text = []
97
+ for token in doc:
98
+ if token.text in ['a', 'an']:
99
+ next_token = token.nbor(1)
100
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
101
+ corrected_text.append("an")
102
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
103
+ corrected_text.append("a")
104
+ else:
105
+ corrected_text.append(token.text)
106
+ else:
107
+ corrected_text.append(token.text)
108
+ return ' '.join(corrected_text)
109
+
110
+ # Function to get the correct synonym while maintaining verb form
111
+ def replace_with_synonym(token):
112
+ pos = None
113
+ if token.pos_ == "VERB":
114
+ pos = wordnet.VERB
115
+ elif token.pos_ == "NOUN":
116
+ pos = wordnet.NOUN
117
+ elif token.pos_ == "ADJ":
118
+ pos = wordnet.ADJ
119
+ elif token.pos_ == "ADV":
120
+ pos = wordnet.ADV
121
+
122
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
123
+
124
+ if synonyms:
125
+ synonym = synonyms[0]
126
+ # Ensure the correct grammatical form is maintained
127
+ if token.tag_ == "VBG": # Present participle (e.g., running)
128
+ synonym = synonym + 'ing'
129
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
130
+ synonym = synonym + 'ed'
131
+ elif token.tag_ == "VBZ": # Third-person singular present
132
+ synonym = synonym + 's'
133
+ return synonym
134
+ return token.text
135
+
136
  # Function to paraphrase and correct grammar
137
  def paraphrase_and_correct(text):
138
  paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
 
141
  paraphrased_text = correct_article_errors(paraphrased_text)
142
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
143
  paraphrased_text = correct_tense_errors(paraphrased_text)
144
+
145
+ # Replace words with synonyms while maintaining verb form
146
+ doc = nlp(paraphrased_text)
147
+ final_text = []
148
+ for token in doc:
149
+ if token.pos_ in {"VERB", "NOUN", "ADJ", "ADV"}:
150
+ final_text.append(replace_with_synonym(token))
151
+ else:
152
+ final_text.append(token.text)
153
 
154
+ return ' '.join(final_text)
 
 
 
155
 
156
  # Gradio app setup with two tabs
157
  with gr.Blocks() as demo:
 
173
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
174
 
175
  # Launch the app with the remaining functionalities
176
+ demo.launch()