# Import dependencies import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import nltk from nltk.corpus import wordnet import spacy import subprocess from gensim.models import KeyedVectors from gensim import downloader as api from nltk.tokenize import word_tokenize # Download NLTK data (if not already downloaded) nltk.download('punkt') nltk.download('stopwords') # Ensure the spaCy model is installed try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Load a smaller Word2Vec model from Gensim's pre-trained models word_vectors = api.load("glove-wiki-gigaword-50") # Check for GPU and set the device accordingly device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load AI Detector model and tokenizer from Hugging Face (DistilBERT) tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device) # AI detection function using DistilBERT def detect_ai_generated(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device) with torch.no_grad(): outputs = model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) ai_probability = probabilities[0][1].item() # Probability of being AI-generated return f"AI-Generated Content Probability: {ai_probability:.2f}%" # Function to get synonyms using Gensim Word2Vec def get_synonyms_gensim(word): try: synonyms = word_vectors.most_similar(positive=[word], topn=5) return [synonym[0] for synonym in synonyms] except KeyError: return [] # Paraphrasing function using Gensim for synonym replacement def paraphrase_with_gensim(text): words = word_tokenize(text) paraphrased_words = [] for word in words: synonyms = get_synonyms_gensim(word.lower()) if synonyms: paraphrased_words.append(synonyms[0]) else: paraphrased_words.append(word) return ' '.join(paraphrased_words) # Paraphrasing function using spaCy for synonym replacement def paraphrase_with_spacy(text): doc = nlp(text) paraphrased_words = [] for token in doc: synonyms = get_synonyms_gensim(token.text.lower()) if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}: # Only replace certain types of words paraphrased_words.append(synonyms[0]) else: paraphrased_words.append(token.text) return ' '.join(paraphrased_words) # Gradio interface definition with gr.Blocks() as interface: with gr.Row(): with gr.Column(): text_input = gr.Textbox(lines=5, label="Input Text") detect_button = gr.Button("AI Detection") paraphrase_gensim_button = gr.Button("Paraphrase with Gensim") paraphrase_spacy_button = gr.Button("Paraphrase with spaCy") with gr.Column(): output_text = gr.Textbox(label="Output") detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text) paraphrase_gensim_button.click(paraphrase_with_gensim, inputs=text_input, outputs=output_text) paraphrase_spacy_button.click(paraphrase_with_spacy, inputs=text_input, outputs=output_text) # Launch the Gradio app interface.launch(debug=False)