Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ import spacy
|
|
5 |
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
|
|
|
|
8 |
|
9 |
# Initialize the English text classification pipeline for AI detection
|
10 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
@@ -51,53 +53,47 @@ def capitalize_sentences_and_nouns(text):
|
|
51 |
|
52 |
return ' '.join(corrected_text)
|
53 |
|
54 |
-
#
|
55 |
def correct_tense_errors(text):
|
56 |
doc = nlp(text)
|
57 |
corrected_text = []
|
|
|
58 |
for token in doc:
|
59 |
if token.pos_ == "VERB":
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
elif token.tag_ in {"VBD", "VBN"}: # Past tense correction
|
64 |
-
corrected_text.append(lemma + "ed")
|
65 |
-
else:
|
66 |
-
corrected_text.append(token.text)
|
67 |
else:
|
68 |
corrected_text.append(token.text)
|
|
|
69 |
return ' '.join(corrected_text)
|
70 |
|
71 |
-
#
|
72 |
def correct_singular_plural_errors(text):
|
73 |
doc = nlp(text)
|
74 |
corrected_text = []
|
75 |
-
|
76 |
for token in doc:
|
77 |
if token.pos_ == "NOUN":
|
78 |
-
if token.tag_ == "NN"
|
79 |
-
corrected_text.append(token.text
|
80 |
-
elif token.tag_ == "NNS"
|
81 |
-
corrected_text.append(token.
|
82 |
-
else:
|
83 |
-
corrected_text.append(token.text)
|
84 |
else:
|
85 |
corrected_text.append(token.text)
|
86 |
-
|
87 |
return ' '.join(corrected_text)
|
88 |
|
89 |
-
#
|
90 |
def correct_article_errors(text):
|
91 |
doc = nlp(text)
|
92 |
corrected_text = []
|
93 |
-
|
94 |
-
|
95 |
-
if token.text.lower() in ['a', 'an']:
|
96 |
next_token = token.nbor(1)
|
97 |
-
|
98 |
-
if token.text == "a" and next_word_lemma[0].lower() in vowels:
|
99 |
corrected_text.append("an")
|
100 |
-
elif token.text == "an" and
|
101 |
corrected_text.append("a")
|
102 |
else:
|
103 |
corrected_text.append(token.text)
|
@@ -105,12 +101,19 @@ def correct_article_errors(text):
|
|
105 |
corrected_text.append(token.text)
|
106 |
return ' '.join(corrected_text)
|
107 |
|
108 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
def paraphrase_with_spacy_nltk(text):
|
110 |
doc = nlp(text)
|
111 |
paraphrased_words = []
|
112 |
|
113 |
for token in doc:
|
|
|
114 |
pos = None
|
115 |
if token.pos_ in {"NOUN"}:
|
116 |
pos = wordnet.NOUN
|
@@ -123,12 +126,13 @@ def paraphrase_with_spacy_nltk(text):
|
|
123 |
|
124 |
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
|
125 |
|
126 |
-
# Replace with a synonym only if it makes sense
|
127 |
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
|
128 |
paraphrased_words.append(synonyms[0])
|
129 |
else:
|
130 |
paraphrased_words.append(token.text)
|
131 |
|
|
|
132 |
paraphrased_sentence = ' '.join(paraphrased_words)
|
133 |
|
134 |
return paraphrased_sentence
|
@@ -143,8 +147,11 @@ def paraphrase_and_correct(text):
|
|
143 |
corrected_text = capitalize_sentences_and_nouns(corrected_text)
|
144 |
corrected_text = correct_singular_plural_errors(corrected_text)
|
145 |
|
146 |
-
# Step 3:
|
147 |
-
|
|
|
|
|
|
|
148 |
|
149 |
return final_text
|
150 |
|
@@ -167,5 +174,5 @@ with gr.Blocks() as demo:
|
|
167 |
# Connect the paraphrasing function to the button
|
168 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
169 |
|
170 |
-
# Launch the app
|
171 |
demo.launch()
|
|
|
5 |
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
8 |
+
from gingerit.gingerit import GingerIt
|
9 |
+
from pattern.en import conjugate, lemma, pluralize, singularize
|
10 |
|
11 |
# Initialize the English text classification pipeline for AI detection
|
12 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
|
53 |
|
54 |
return ' '.join(corrected_text)
|
55 |
|
56 |
+
# Function to correct tense errors using Pattern
|
57 |
def correct_tense_errors(text):
|
58 |
doc = nlp(text)
|
59 |
corrected_text = []
|
60 |
+
|
61 |
for token in doc:
|
62 |
if token.pos_ == "VERB":
|
63 |
+
# Use conjugate from Pattern to adjust the tense of the verb
|
64 |
+
verb_form = conjugate(lemma(token.text), tense='past') # Example: fix to past tense
|
65 |
+
corrected_text.append(verb_form)
|
|
|
|
|
|
|
|
|
66 |
else:
|
67 |
corrected_text.append(token.text)
|
68 |
+
|
69 |
return ' '.join(corrected_text)
|
70 |
|
71 |
+
# Function to correct singular/plural errors using Pattern
|
72 |
def correct_singular_plural_errors(text):
|
73 |
doc = nlp(text)
|
74 |
corrected_text = []
|
75 |
+
|
76 |
for token in doc:
|
77 |
if token.pos_ == "NOUN":
|
78 |
+
if token.tag_ == "NN": # Singular noun
|
79 |
+
corrected_text.append(singularize(token.text))
|
80 |
+
elif token.tag_ == "NNS": # Plural noun
|
81 |
+
corrected_text.append(pluralize(token.text))
|
|
|
|
|
82 |
else:
|
83 |
corrected_text.append(token.text)
|
84 |
+
|
85 |
return ' '.join(corrected_text)
|
86 |
|
87 |
+
# Function to check and correct article errors
|
88 |
def correct_article_errors(text):
|
89 |
doc = nlp(text)
|
90 |
corrected_text = []
|
91 |
+
for token in doc:
|
92 |
+
if token.text in ['a', 'an']:
|
|
|
93 |
next_token = token.nbor(1)
|
94 |
+
if token.text == "a" and next_token.text[0].lower() in "aeiou":
|
|
|
95 |
corrected_text.append("an")
|
96 |
+
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
|
97 |
corrected_text.append("a")
|
98 |
else:
|
99 |
corrected_text.append(token.text)
|
|
|
101 |
corrected_text.append(token.text)
|
102 |
return ' '.join(corrected_text)
|
103 |
|
104 |
+
# Function to correct overall grammar using GingerIt
|
105 |
+
def correct_grammar(text):
|
106 |
+
parser = GingerIt()
|
107 |
+
result = parser.parse(text)
|
108 |
+
return result['result']
|
109 |
+
|
110 |
+
# Paraphrasing function using SpaCy and NLTK (Humanifier)
|
111 |
def paraphrase_with_spacy_nltk(text):
|
112 |
doc = nlp(text)
|
113 |
paraphrased_words = []
|
114 |
|
115 |
for token in doc:
|
116 |
+
# Map SpaCy POS tags to WordNet POS tags
|
117 |
pos = None
|
118 |
if token.pos_ in {"NOUN"}:
|
119 |
pos = wordnet.NOUN
|
|
|
126 |
|
127 |
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
|
128 |
|
129 |
+
# Replace with a synonym only if it makes sense
|
130 |
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
|
131 |
paraphrased_words.append(synonyms[0])
|
132 |
else:
|
133 |
paraphrased_words.append(token.text)
|
134 |
|
135 |
+
# Join the words back into a sentence
|
136 |
paraphrased_sentence = ' '.join(paraphrased_words)
|
137 |
|
138 |
return paraphrased_sentence
|
|
|
147 |
corrected_text = capitalize_sentences_and_nouns(corrected_text)
|
148 |
corrected_text = correct_singular_plural_errors(corrected_text)
|
149 |
|
150 |
+
# Step 3: Correct tense errors
|
151 |
+
corrected_text = correct_tense_errors(corrected_text)
|
152 |
+
|
153 |
+
# Step 4: Correct overall grammar using GingerIt
|
154 |
+
final_text = correct_grammar(corrected_text)
|
155 |
|
156 |
return final_text
|
157 |
|
|
|
174 |
# Connect the paraphrasing function to the button
|
175 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
176 |
|
177 |
+
# Launch the app
|
178 |
demo.launch()
|