|
import os |
|
import gradio as gr |
|
from transformers import pipeline |
|
import spacy |
|
import subprocess |
|
import nltk |
|
from nltk.corpus import wordnet |
|
from nltk.corpus import stopwords |
|
from nltk.tokenize import word_tokenize |
|
from spellchecker import SpellChecker |
|
import re |
|
import string |
|
import random |
|
|
|
|
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
nltk.download('averaged_perceptron_tagger') |
|
nltk.download('averaged_perceptron_tagger_eng') |
|
nltk.download('wordnet') |
|
nltk.download('omw-1.4') |
|
nltk.download('punkt_tab') |
|
|
|
|
|
stop_words = set(stopwords.words("english")) |
|
|
|
|
|
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'} |
|
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'} |
|
|
|
|
|
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta") |
|
|
|
|
|
spell = SpellChecker() |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'} |
|
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'} |
|
|
|
def get_synonyms(word): |
|
"""Find synonyms for a given word considering the context.""" |
|
synonyms = set() |
|
for syn in wordnet.synsets(word): |
|
for lemma in syn.lemmas(): |
|
if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower(): |
|
synonyms.add(lemma.name()) |
|
return synonyms |
|
|
|
def replace_with_synonyms(word, pos_tag): |
|
"""Replace words with synonyms, keeping the original POS tag.""" |
|
synonyms = get_synonyms(word) |
|
|
|
filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag] |
|
if filtered_synonyms: |
|
return random.choice(filtered_synonyms) |
|
return word |
|
|
|
def improve_paraphrasing_and_grammar(text): |
|
"""Paraphrase and correct grammatical errors in the text.""" |
|
doc = nlp(text) |
|
corrected_text = [] |
|
|
|
for sent in doc.sents: |
|
sentence = [] |
|
for token in sent: |
|
|
|
if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation: |
|
synonym = replace_with_synonyms(token.text, token.tag_) |
|
sentence.append(synonym if synonym else token.text) |
|
else: |
|
sentence.append(token.text) |
|
|
|
corrected_text.append(' '.join(sentence)) |
|
|
|
|
|
final_text = ' '.join(corrected_text) |
|
final_text = fix_possessives(final_text) |
|
final_text = fix_punctuation_spacing(final_text) |
|
final_text = capitalize_sentences(final_text) |
|
final_text = fix_article_errors(final_text) |
|
|
|
return final_text |
|
|
|
def fix_punctuation_spacing(text): |
|
"""Fix spaces before punctuation marks.""" |
|
text = re.sub(r'\s+([,.!?])', r'\1', text) |
|
return text |
|
|
|
def fix_possessives(text): |
|
"""Correct possessives like 'John ' s' -> 'John's'.""" |
|
return re.sub(r"(\w)\s?'\s?s", r"\1's", text) |
|
|
|
def capitalize_sentences(text): |
|
"""Capitalize the first letter of each sentence.""" |
|
return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)]) |
|
|
|
def fix_article_errors(text): |
|
"""Correct 'a' and 'an' usage based on following word's sound.""" |
|
doc = nlp(text) |
|
corrected = [] |
|
for token in doc: |
|
if token.text in ('a', 'an'): |
|
next_token = token.nbor(1) |
|
if token.text == "a" and next_token.text[0].lower() in "aeiou": |
|
corrected.append("an") |
|
elif token.text == "an" and next_token.text[0].lower() not in "aeiou": |
|
corrected.append("a") |
|
else: |
|
corrected.append(token.text) |
|
else: |
|
corrected.append(token.text) |
|
return ' '.join(corrected) |
|
|
|
|
|
def gradio_interface(text): |
|
"""Gradio interface function to process the input text.""" |
|
return improve_paraphrasing_and_grammar(text) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Text Paraphrasing and Grammar Correction") |
|
text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction') |
|
text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False) |
|
submit_button = gr.Button("π Paraphrase and Correct") |
|
|
|
submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output) |
|
|
|
|
|
demo.launch(share=True) |
|
|