Spaces:

sashdev
/

LTP

Runtime error

App Files Files Community

LTP / app.py

sashdev

Update app.py

e63bdfe verified 10 months ago

raw

history blame

5.12 kB

	import os
	import gradio as gr
	from transformers import pipeline
	import spacy
	import subprocess
	import nltk
	from nltk.corpus import wordnet
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from spellchecker import SpellChecker
	import re
	import string
	import random

	# Download necessary NLTK data
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('averaged_perceptron_tagger')
	nltk.download('averaged_perceptron_tagger_eng')
	nltk.download('wordnet')
	nltk.download('omw-1.4')
	nltk.download('punkt_tab')

	# Initialize stopwords
	stop_words = set(stopwords.words("english"))

	# Words we don't want to replace
	exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
	exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}

	# Initialize the English text classification pipeline for AI detection
	pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

	# Initialize the spell checker
	spell = SpellChecker()

	# Ensure the SpaCy model is installed
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# Load SpaCy model
	nlp = spacy.load("en_core_web_sm")

	# Exclude tags and words (adjusted for better precision)
	exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
	exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}

	def get_synonyms(word):
	"""Find synonyms for a given word considering the context."""
	synonyms = set()
	for syn in wordnet.synsets(word):
	for lemma in syn.lemmas():
	if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
	synonyms.add(lemma.name())
	return synonyms

	def replace_with_synonyms(word, pos_tag):
	"""Replace words with synonyms, keeping the original POS tag."""
	synonyms = get_synonyms(word)
	# Filter by POS tag
	filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag]
	if filtered_synonyms:
	return random.choice(filtered_synonyms)
	return word

	def improve_paraphrasing_and_grammar(text):
	"""Paraphrase and correct grammatical errors in the text."""
	doc = nlp(text)
	corrected_text = []

	for sent in doc.sents:
	sentence = []
	for token in sent:
	# Replace words with synonyms, excluding special POS tags
	if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation:
	synonym = replace_with_synonyms(token.text, token.tag_)
	sentence.append(synonym if synonym else token.text)
	else:
	sentence.append(token.text)

	corrected_text.append(' '.join(sentence))

	# Ensure proper punctuation and capitalization
	final_text = ' '.join(corrected_text)
	final_text = fix_possessives(final_text)
	final_text = fix_punctuation_spacing(final_text)
	final_text = capitalize_sentences(final_text)
	final_text = fix_article_errors(final_text)

	return final_text

	def fix_punctuation_spacing(text):
	"""Fix spaces before punctuation marks."""
	text = re.sub(r'\s+([,.!?])', r'\1', text)
	return text

	def fix_possessives(text):
	"""Correct possessives like 'John ' s' -> 'John's'."""
	return re.sub(r"(\w)\s?'\s?s", r"\1's", text)

	def capitalize_sentences(text):
	"""Capitalize the first letter of each sentence."""
	return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)])

	def fix_article_errors(text):
	"""Correct 'a' and 'an' usage based on following word's sound."""
	doc = nlp(text)
	corrected = []
	for token in doc:
	if token.text in ('a', 'an'):
	next_token = token.nbor(1)
	if token.text == "a" and next_token.text[0].lower() in "aeiou":
	corrected.append("an")
	elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
	corrected.append("a")
	else:
	corrected.append(token.text)
	else:
	corrected.append(token.text)
	return ' '.join(corrected)

	# Gradio app setup
	def gradio_interface(text):
	"""Gradio interface function to process the input text."""
	return improve_paraphrasing_and_grammar(text)

	with gr.Blocks() as demo:
	gr.Markdown("## Text Paraphrasing and Grammar Correction")
	text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction')
	text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False)
	submit_button = gr.Button("🔄 Paraphrase and Correct")

	submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output)

	# Launch the Gradio app
	demo.launch(share=True)