Spaces:
Running
Running
File size: 5,199 Bytes
b0503ee 1fb0b90 84669bc b7577da 7feda08 7fc55d1 6d0ac04 1fb0b90 7fc55d1 51568dc 6ba2176 1fb0b90 6ba2176 c163eb2 6ba2176 7feda08 1fb0b90 f79e1dd 1fb0b90 f036c05 f79e1dd f036c05 15f7e94 e00f367 f79e1dd e00f367 f79e1dd e00f367 f79e1dd e00f367 f79e1dd e00f367 f79e1dd e00f367 a456e86 e00f367 f79e1dd e00f367 de007f1 a456e86 de007f1 a456e86 e00f367 a456e86 f79e1dd a456e86 847e3e1 e5063d8 f79e1dd c163eb2 f79e1dd aed9390 2f3beab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import subprocess
import gradio as gr
from transformers import pipeline
import spacy
import nltk
from nltk.corpus import wordnet
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
# Ensure the SpaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Initialize the English text classification pipeline for AI detection
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
def predict_en(text):
"""Function to predict the label and score for English text (AI Detection)"""
res = pipeline_en(text)[0]
return res['label'], res['score']
def get_synonyms_nltk(word, pos):
"""Function to get synonyms using NLTK WordNet"""
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
def rephrase_text(text):
"""Function to rephrase text by replacing words with synonyms"""
doc = nlp(text)
rephrased_text = []
for token in doc:
if token.pos_ in ["NOUN", "VERB", "ADJ"]:
synonyms = get_synonyms_nltk(token.text, pos=token.pos_.lower())
if synonyms:
rephrased_text.append(synonyms[0]) # Replace with first synonym found
else:
rephrased_text.append(token.text)
else:
rephrased_text.append(token.text)
return ' '.join(rephrased_text)
def capitalize_sentences_and_nouns(text):
"""Function to capitalize the first letter of sentences and proper nouns"""
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start: # First word of the sentence
sentence.append(token.text.capitalize())
elif token.pos_ == "PROPN": # Proper noun
sentence.append(token.text.capitalize())
else:
sentence.append(token.text)
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
def correct_tense_errors(text):
"""Function to correct tense errors in a sentence"""
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
corrected_text.append(lemma)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
def correct_singular_plural_errors(text):
"""Function to correct singular/plural errors"""
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "NOUN":
if token.tag_ == "NN": # Singular noun
if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
corrected_text.append(token.lemma_ + 's')
else:
corrected_text.append(token.text)
elif token.tag_ == "NNS": # Plural noun
if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
corrected_text.append(token.lemma_)
else:
corrected_text.append(token.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
def correct_article_errors(text):
"""Function to check and correct article errors"""
doc = nlp(text)
corrected_text = []
for token in doc:
if token.text in ['a', 'an']:
next_token = token.nbor(1)
if token.text == "a" and next_token.text[0].lower() in "aeiou":
corrected_text.append("an")
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
corrected_text.append("a")
else:
corrected_text.append(token.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
def paraphrase_and_correct(text):
"""Function to rephrase and correct grammar"""
rephrased_text = rephrase_text(text)
rephrased_text = capitalize_sentences_and_nouns(rephrased_text) # Capitalize first to ensure proper noun capitalization
rephrased_text = correct_article_errors(rephrased_text)
rephrased_text = correct_tense_errors(rephrased_text)
rephrased_text = correct_singular_plural_errors(rephrased_text)
return rephrased_text
# Define Gradio interface
with gr.Blocks() as demo:
with gr.Row():
t1 = gr.Textbox(label="Input Text", lines=5)
button1 = gr.Button("Process")
with gr.Row():
output_text = gr.Textbox(label="Processed Text", lines=5)
label1 = gr.Label(label="AI Detection Label")
score1 = gr.Label(label="AI Detection Score")
button1.click(
fn=lambda text: (paraphrase_and_correct(text), *predict_en(text)),
inputs=[t1],
outputs=[output_text, label1, score1]
)
demo.launch()
|