Spaces:
Running
Running
File size: 3,820 Bytes
84669bc 7feda08 29edf23 7feda08 6ba2176 7fc55d1 7feda08 85e8aa6 936bfca 7fc55d1 6ba2176 7feda08 c93f011 29edf23 c93f011 5065a5b 85e8aa6 5065a5b 85e8aa6 5065a5b 85e8aa6 5065a5b 85e8aa6 5065a5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api
from textblob import TextBlob # Import TextBlob for grammar correction
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
# Ensure the spaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# Function to correct grammar using TextBlob
def correct_grammar_with_textblob(text):
blob = TextBlob(text)
corrected_text = str(blob.correct())
return corrected_text
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
# Paraphrasing function using spaCy and NLTK with TextBlob grammar correction
def paraphrase_with_spacy_nltk(text):
doc = nlp(text)
paraphrased_words = []
for token in doc:
# Map spaCy POS tags to WordNet POS tags
pos = None
if token.pos_ in {"NOUN"}:
pos = wordnet.NOUN
elif token.pos_ in {"VERB"}:
pos = wordnet.VERB
elif token.pos_ in {"ADJ"}:
pos = wordnet.ADJ
elif token.pos_ in {"ADV"}:
pos = wordnet.ADV
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
# Replace with a synonym only if it makes sense
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
paraphrased_words.append(synonyms[0])
else:
paraphrased_words.append(token.text)
# Join the words back into a sentence
paraphrased_sentence = ' '.join(paraphrased_words)
# Correct the grammar of the paraphrased sentence using TextBlob
corrected_sentence = correct_grammar_with_textblob(paraphrased_sentence)
return corrected_sentence
# Gradio interface definition
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(lines=5, label="Input Text")
detect_button = gr.Button("AI Detection")
paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK (Grammar Corrected with TextBlob)")
with gr.Column():
output_text = gr.Textbox(label="Output")
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
# Launch the Gradio app
interface.launch(debug=False)
|