huamnifierWithSimpleGrammer

Running

App Files Files

huamnifierWithSimpleGrammer / app.py

sashtech

Update app.py

f79e1dd verified 10 months ago

raw

history blame

5.2 kB

	import os
	import subprocess
	import gradio as gr
	from transformers import pipeline
	import spacy
	import nltk
	from nltk.corpus import wordnet

	# Ensure necessary NLTK data is downloaded
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	# Ensure the SpaCy model is installed
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# Initialize the English text classification pipeline for AI detection
	pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

	def predict_en(text):
	"""Function to predict the label and score for English text (AI Detection)"""
	res = pipeline_en(text)[0]
	return res['label'], res['score']

	def get_synonyms_nltk(word, pos):
	"""Function to get synonyms using NLTK WordNet"""
	synsets = wordnet.synsets(word, pos=pos)
	if synsets:
	lemmas = synsets[0].lemmas()
	return [lemma.name() for lemma in lemmas]
	return []

	def rephrase_text(text):
	"""Function to rephrase text by replacing words with synonyms"""
	doc = nlp(text)
	rephrased_text = []

	for token in doc:
	if token.pos_ in ["NOUN", "VERB", "ADJ"]:
	synonyms = get_synonyms_nltk(token.text, pos=token.pos_.lower())
	if synonyms:
	rephrased_text.append(synonyms[0]) # Replace with first synonym found
	else:
	rephrased_text.append(token.text)
	else:
	rephrased_text.append(token.text)

	return ' '.join(rephrased_text)

	def capitalize_sentences_and_nouns(text):
	"""Function to capitalize the first letter of sentences and proper nouns"""
	doc = nlp(text)
	corrected_text = []

	for sent in doc.sents:
	sentence = []
	for token in sent:
	if token.i == sent.start: # First word of the sentence
	sentence.append(token.text.capitalize())
	elif token.pos_ == "PROPN": # Proper noun
	sentence.append(token.text.capitalize())
	else:
	sentence.append(token.text)
	corrected_text.append(' '.join(sentence))

	return ' '.join(corrected_text)

	def correct_tense_errors(text):
	"""Function to correct tense errors in a sentence"""
	doc = nlp(text)
	corrected_text = []
	for token in doc:
	if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
	lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
	corrected_text.append(lemma)
	else:
	corrected_text.append(token.text)
	return ' '.join(corrected_text)

	def correct_singular_plural_errors(text):
	"""Function to correct singular/plural errors"""
	doc = nlp(text)
	corrected_text = []
	for token in doc:
	if token.pos_ == "NOUN":
	if token.tag_ == "NN": # Singular noun
	if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
	corrected_text.append(token.lemma_ + 's')
	else:
	corrected_text.append(token.text)
	elif token.tag_ == "NNS": # Plural noun
	if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
	corrected_text.append(token.lemma_)
	else:
	corrected_text.append(token.text)
	else:
	corrected_text.append(token.text)
	return ' '.join(corrected_text)

	def correct_article_errors(text):
	"""Function to check and correct article errors"""
	doc = nlp(text)
	corrected_text = []
	for token in doc:
	if token.text in ['a', 'an']:
	next_token = token.nbor(1)
	if token.text == "a" and next_token.text[0].lower() in "aeiou":
	corrected_text.append("an")
	elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
	corrected_text.append("a")
	else:
	corrected_text.append(token.text)
	else:
	corrected_text.append(token.text)
	return ' '.join(corrected_text)

	def paraphrase_and_correct(text):
	"""Function to rephrase and correct grammar"""
	rephrased_text = rephrase_text(text)
	rephrased_text = capitalize_sentences_and_nouns(rephrased_text) # Capitalize first to ensure proper noun capitalization
	rephrased_text = correct_article_errors(rephrased_text)
	rephrased_text = correct_tense_errors(rephrased_text)
	rephrased_text = correct_singular_plural_errors(rephrased_text)
	return rephrased_text

	# Define Gradio interface
	with gr.Blocks() as demo:
	with gr.Row():
	t1 = gr.Textbox(label="Input Text", lines=5)
	button1 = gr.Button("Process")
	with gr.Row():
	output_text = gr.Textbox(label="Processed Text", lines=5)
	label1 = gr.Label(label="AI Detection Label")
	score1 = gr.Label(label="AI Detection Score")

	button1.click(
	fn=lambda text: (paraphrase_and_correct(text), *predict_en(text)),
	inputs=[t1],
	outputs=[output_text, label1, score1]
	)

	demo.launch()