huamnifierWithSimpleGrammer

Running

App Files Files

huamnifierWithSimpleGrammer / app.py

sashtech

Update app.py

aec8023 verified 10 months ago

raw

history blame

4.31 kB

	import os
	import gradio as gr
	from transformers import pipeline
	import spacy
	import subprocess
	import nltk
	from nltk.corpus import wordnet
	from gensim import downloader as api

	# Ensure necessary NLTK data is downloaded
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	# Ensure the SpaCy model is installed
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# Load a smaller Word2Vec model from Gensim's pre-trained models
	word_vectors = api.load("glove-wiki-gigaword-50")

	# Load the English AI detection pipeline using the Hello-SimpleAI model
	pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

	# AI detection function using the Hello-SimpleAI/chatgpt-detector-roberta model
	def detect_ai_generated(text):
	res = pipeline_en(text)[0]
	label = res['label'] # "LABEL_0" or "LABEL_1"
	score = res['score'] * 100 # Convert probability to percentage

	# Map the model's label to human-readable label
	human_readable_label = "AI" if label == "LABEL_1" else "Human"

	# Return formatted string with label and percentage score
	return f"The content is {score:.2f}% {human_readable_label} Written", score

	# Function to get synonyms using NLTK WordNet
	def get_synonyms_nltk(word, pos):
	synsets = wordnet.synsets(word, pos=pos)
	if synsets:
	lemmas = synsets[0].lemmas()
	return [lemma.name() for lemma in lemmas]
	return []

	# Function to capitalize the first letter of sentences and proper nouns
	def capitalize_sentences_and_nouns(text):
	doc = nlp(text)
	corrected_text = []

	for sent in doc.sents:
	sentence = []
	for token in sent:
	if token.i == sent.start: # First word of the sentence
	sentence.append(token.text.capitalize())
	elif token.pos_ == "PROPN": # Proper noun
	sentence.append(token.text.capitalize())
	else:
	sentence.append(token.text)
	corrected_text.append(' '.join(sentence))

	return ' '.join(corrected_text)

	# Paraphrasing function using SpaCy and NLTK
	def paraphrase_with_spacy_nltk(text):
	doc = nlp(text)
	paraphrased_words = []

	for token in doc:
	# Map SpaCy POS tags to WordNet POS tags
	pos = None
	if token.pos_ in {"NOUN"}:
	pos = wordnet.NOUN
	elif token.pos_ in {"VERB"}:
	pos = wordnet.VERB
	elif token.pos_ in {"ADJ"}:
	pos = wordnet.ADJ
	elif token.pos_ in {"ADV"}:
	pos = wordnet.ADV

	synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []

	# Replace with a synonym only if it makes sense
	if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
	paraphrased_words.append(synonyms[0])
	else:
	paraphrased_words.append(token.text)

	# Join the words back into a sentence
	paraphrased_sentence = ' '.join(paraphrased_words)

	# Capitalize sentences and proper nouns
	corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)

	return corrected_text

	# Combined function: Paraphrase -> Capitalization
	def paraphrase_and_correct(text):
	# Step 1: Paraphrase the text
	paraphrased_text = paraphrase_with_spacy_nltk(text)

	# Step 2: Capitalize sentences and proper nouns
	final_text = capitalize_sentences_and_nouns(paraphrased_text)

	return final_text

	# Gradio interface definition
	with gr.Blocks() as interface:
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(lines=5, label="Input Text")
	detect_button = gr.Button("AI Detection")
	paraphrase_button = gr.Button("Paraphrase & Correct")
	with gr.Column():
	output_label = gr.Textbox(label="Predicted Label 🎃")
	output_prob = gr.Textbox(label="Probability (%)")

	detect_button.click(detect_ai_generated, inputs=text_input, outputs=[output_label, output_prob])
	paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_label)

	# Launch the Gradio app
	interface.launch(debug=False)