huamnifierWithSimpleGrammer

Running

File size: 2,977 Bytes

29edf23
84669bc
29edf23
 
 
30196dc
 
936bfca
30196dc
 
 
 
936bfca
c93f011
 
 
29edf23
 
c93f011
936bfca
4d1390a
 
 
10dc1f6
30196dc
 
 
 
 
b3aee5e
4d1390a
 
ea28e08
4d1390a
84669bc
4d1390a
 
 
 
 
 
 
 
 
 
 
 
99b3c08
84669bc
4d1390a
 
29edf23
4d1390a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84669bc
776fa07
84669bc
4d1390a

# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
import torch
import nltk
from nltk.corpus import wordnet
import subprocess

# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')  # Download WordNet

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# Load Parrot Paraphraser model and tokenizer for humanizing text
paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return f"AI-Generated Content Probability: {ai_probability:.2f}%"

# Humanize the AI-detected text using the Parrot Paraphraser model
def humanize_text(AI_text):
    inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    with torch.no_grad():  # Avoid gradient calculations for faster inference
        paraphrased_ids = paraphrase_model.generate(
            inputs['input_ids'],
            max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
            num_beams=4,
            early_stopping=True,
            length_penalty=1.0,
            no_repeat_ngram_size=3,
        )
    paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
    return f"Humanized Text:\n{paraphrased_text}"

# Gradio interface definition
ai_detection_interface = gr.Interface(
    fn=detect_ai_generated,
    inputs="textbox",
    outputs="text",
    title="AI Text Detection",
    description="Enter text to determine the probability of it being AI-generated."
)

humanization_interface = gr.Interface(
    fn=humanize_text,
    inputs="textbox",
    outputs="text",
    title="Text Humanizer",
    description="Enter text to get a human-written version, paraphrased for natural output."
)

# Combine both interfaces into a single Gradio app with tabs
interface = gr.TabbedInterface(
    [ai_detection_interface, humanization_interface],
    ["AI Detection", "Humanization"]
)

# Launch the Gradio app
interface.launch(debug=False)