Spaces:
Running
Running
File size: 6,685 Bytes
b0503ee 84669bc b7577da 7feda08 90fff6b 7fc55d1 6d0ac04 90fff6b 7fc55d1 51568dc 6ba2176 90fff6b 6ba2176 c163eb2 6ba2176 7feda08 90fff6b f036c05 15f7e94 e00f367 90fff6b f79e1dd 90fff6b f79e1dd 90fff6b f79e1dd 90fff6b f79e1dd 90fff6b f79e1dd 90fff6b e00f367 f79e1dd e00f367 f79e1dd e00f367 90fff6b e00f367 90fff6b e00f367 90fff6b e00f367 90fff6b e00f367 de007f1 a456e86 de007f1 a456e86 e00f367 90fff6b e00f367 90fff6b a456e86 90fff6b a456e86 847e3e1 90fff6b c163eb2 90fff6b f79e1dd 90fff6b aed9390 90fff6b 2f3beab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
import os
import gradio as gr
from transformers import pipeline
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
# Initialize the English text classification pipeline for AI detection
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
# Function to predict the label and score for English text (AI Detection)
def predict_en(text):
res = pipeline_en(text)[0]
return res['label'], res['score']
# Ensure necessary NLTK data is downloaded for Humanifier
nltk.download('wordnet')
nltk.download('omw-1.4')
# Ensure the SpaCy model is installed for Humanifier
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Function to get synonyms using NLTK WordNet (Humanifier)
def get_synonyms_nltk(word, pos):
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
# Updated function to replace words with synonyms while preserving verb forms and pluralization
def replace_with_synonyms(text):
doc = nlp(text)
replaced_words = {}
corrected_text = []
for token in doc:
word = token.text
pos = token.pos_
# Get the WordNet POS tag format
if pos == "VERB":
wordnet_pos = wordnet.VERB
elif pos == "NOUN":
wordnet_pos = wordnet.NOUN
elif pos == "ADJ":
wordnet_pos = wordnet.ADJ
elif pos == "ADV":
wordnet_pos = wordnet.ADV
else:
corrected_text.append(word) # No change for other POS
continue
# Get synonyms for the word based on POS
if word in replaced_words:
synonym = replaced_words[word]
else:
synonyms = get_synonyms_nltk(word, wordnet_pos)
if synonyms:
synonym = synonyms[0] # Use the first synonym
# Ensure the synonym retains the same form (e.g., plural, verb form)
if pos == "VERB":
synonym = token.lemma_ if synonym == token.lemma_ else token._.inflect(token.tag_)
if pos == "NOUN" and token.tag_ == "NNS": # If plural noun, make sure synonym is plural
synonym += 's'
replaced_words[word] = synonym
else:
synonym = word # No synonym found, keep the word as is
corrected_text.append(synonym)
return ' '.join(corrected_text)
# Function to capitalize the first letter of sentences and proper nouns (Humanifier)
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start: # First word of the sentence
sentence.append(token.text.capitalize())
elif token.pos_ == "PROPN": # Proper noun
sentence.append(token.text.capitalize())
else:
sentence.append(token.text)
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Function to paraphrase and correct grammar with stronger synonym usage
def paraphrase_and_correct(text):
paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
# Replace words with their synonyms
paraphrased_text = replace_with_synonyms(paraphrased_text)
# Apply grammatical corrections (can include other corrections from the original functions)
paraphrased_text = correct_article_errors(paraphrased_text)
paraphrased_text = correct_singular_plural_errors(paraphrased_text)
paraphrased_text = correct_tense_errors(paraphrased_text)
return paraphrased_text
# Correct article errors
def correct_article_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.text in ['a', 'an']:
next_token = token.nbor(1)
if token.text == "a" and next_token.text[0].lower() in "aeiou":
corrected_text.append("an")
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
corrected_text.append("a")
else:
corrected_text.append(token.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Correct singular/plural errors
def correct_singular_plural_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "NOUN":
if token.tag_ == "NN": # Singular noun
if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
corrected_text.append(token.lemma_ + 's')
else:
corrected_text.append(token.text)
elif token.tag_ == "NNS": # Plural noun
if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
corrected_text.append(token.lemma_)
else:
corrected_text.append(token.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Correct tense errors in verbs
def correct_tense_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
corrected_text.append(lemma)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Gradio app setup with two tabs
with gr.Blocks() as demo:
with gr.Tab("AI Detection"):
t1 = gr.Textbox(lines=5, label='Text')
button1 = gr.Button("🤖 Predict!")
label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
score1 = gr.Textbox(lines=1, label='Prob')
# Connect the prediction function to the button
button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
with gr.Tab("Humanifier"):
text_input = gr.Textbox(lines=5, label="Input Text")
paraphrase_button = gr.Button("Paraphrase & Correct")
output_text = gr.Textbox(label="Paraphrased Text")
# Connect the paraphrasing function to the button
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
# Launch the app with the remaining functionalities
demo.launch()
|