LTP / app.py
sashdev's picture
Update app.py
e63bdfe verified
raw
history blame
5.12 kB
import os
import gradio as gr
from transformers import pipeline
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from spellchecker import SpellChecker
import re
import string
import random
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')
# Initialize stopwords
stop_words = set(stopwords.words("english"))
# Words we don't want to replace
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
# Initialize the English text classification pipeline for AI detection
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
# Initialize the spell checker
spell = SpellChecker()
# Ensure the SpaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
# Exclude tags and words (adjusted for better precision)
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
def get_synonyms(word):
"""Find synonyms for a given word considering the context."""
synonyms = set()
for syn in wordnet.synsets(word):
for lemma in syn.lemmas():
if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
synonyms.add(lemma.name())
return synonyms
def replace_with_synonyms(word, pos_tag):
"""Replace words with synonyms, keeping the original POS tag."""
synonyms = get_synonyms(word)
# Filter by POS tag
filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag]
if filtered_synonyms:
return random.choice(filtered_synonyms)
return word
def improve_paraphrasing_and_grammar(text):
"""Paraphrase and correct grammatical errors in the text."""
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
# Replace words with synonyms, excluding special POS tags
if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation:
synonym = replace_with_synonyms(token.text, token.tag_)
sentence.append(synonym if synonym else token.text)
else:
sentence.append(token.text)
corrected_text.append(' '.join(sentence))
# Ensure proper punctuation and capitalization
final_text = ' '.join(corrected_text)
final_text = fix_possessives(final_text)
final_text = fix_punctuation_spacing(final_text)
final_text = capitalize_sentences(final_text)
final_text = fix_article_errors(final_text)
return final_text
def fix_punctuation_spacing(text):
"""Fix spaces before punctuation marks."""
text = re.sub(r'\s+([,.!?])', r'\1', text)
return text
def fix_possessives(text):
"""Correct possessives like 'John ' s' -> 'John's'."""
return re.sub(r"(\w)\s?'\s?s", r"\1's", text)
def capitalize_sentences(text):
"""Capitalize the first letter of each sentence."""
return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)])
def fix_article_errors(text):
"""Correct 'a' and 'an' usage based on following word's sound."""
doc = nlp(text)
corrected = []
for token in doc:
if token.text in ('a', 'an'):
next_token = token.nbor(1)
if token.text == "a" and next_token.text[0].lower() in "aeiou":
corrected.append("an")
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
corrected.append("a")
else:
corrected.append(token.text)
else:
corrected.append(token.text)
return ' '.join(corrected)
# Gradio app setup
def gradio_interface(text):
"""Gradio interface function to process the input text."""
return improve_paraphrasing_and_grammar(text)
with gr.Blocks() as demo:
gr.Markdown("## Text Paraphrasing and Grammar Correction")
text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction')
text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False)
submit_button = gr.Button("πŸ”„ Paraphrase and Correct")
submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output)
# Launch the Gradio app
demo.launch(share=True)