Spaces:
Running
Running
File size: 2,977 Bytes
29edf23 84669bc 29edf23 30196dc 936bfca 30196dc 936bfca c93f011 29edf23 c93f011 936bfca 4d1390a 10dc1f6 30196dc b3aee5e 4d1390a ea28e08 4d1390a 84669bc 4d1390a 99b3c08 84669bc 4d1390a 29edf23 4d1390a 84669bc 776fa07 84669bc 4d1390a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
import torch
import nltk
from nltk.corpus import wordnet
import subprocess
# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet') # Download WordNet
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# Load Parrot Paraphraser model and tokenizer for humanizing text
paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
# Humanize the AI-detected text using the Parrot Paraphraser model
def humanize_text(AI_text):
inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
with torch.no_grad(): # Avoid gradient calculations for faster inference
paraphrased_ids = paraphrase_model.generate(
inputs['input_ids'],
max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
num_beams=4,
early_stopping=True,
length_penalty=1.0,
no_repeat_ngram_size=3,
)
paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
return f"Humanized Text:\n{paraphrased_text}"
# Gradio interface definition
ai_detection_interface = gr.Interface(
fn=detect_ai_generated,
inputs="textbox",
outputs="text",
title="AI Text Detection",
description="Enter text to determine the probability of it being AI-generated."
)
humanization_interface = gr.Interface(
fn=humanize_text,
inputs="textbox",
outputs="text",
title="Text Humanizer",
description="Enter text to get a human-written version, paraphrased for natural output."
)
# Combine both interfaces into a single Gradio app with tabs
interface = gr.TabbedInterface(
[ai_detection_interface, humanization_interface],
["AI Detection", "Humanization"]
)
# Launch the Gradio app
interface.launch(debug=False)
|