File size: 3,387 Bytes
29edf23
84669bc
7feda08
29edf23
 
30196dc
7feda08
 
 
 
936bfca
30196dc
 
 
 
936bfca
7feda08
 
 
 
 
 
c93f011
 
 
29edf23
 
c93f011
936bfca
30196dc
 
 
 
 
b3aee5e
4d1390a
 
ea28e08
7feda08
 
 
 
 
 
 
6b18ba5
7feda08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b18ba5
7feda08
 
 
 
 
 
 
 
 
 
4d1390a
7feda08
 
 
776fa07
84669bc
4d1390a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import nltk
from nltk.corpus import wordnet
import spacy
from gensim.models import KeyedVectors
from gensim import downloader as api
from nltk.tokenize import word_tokenize

# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')  # Download WordNet

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return f"AI-Generated Content Probability: {ai_probability:.2f}%"

# Function to get synonyms using Gensim Word2Vec
def get_synonyms_gensim(word):
    try:
        synonyms = word_vectors.most_similar(positive=[word], topn=5)
        return [synonym[0] for synonym in synonyms]
    except KeyError:
        return []

# Paraphrasing function using Gensim for synonym replacement
def paraphrase_with_gensim(text):
    words = word_tokenize(text)
    paraphrased_words = []
    for word in words:
        synonyms = get_synonyms_gensim(word.lower())
        if synonyms:
            paraphrased_words.append(synonyms[0])
        else:
            paraphrased_words.append(word)
    return ' '.join(paraphrased_words)

# Paraphrasing function using spaCy for synonym replacement
def paraphrase_with_spacy(text):
    doc = nlp(text)
    paraphrased_words = []
    for token in doc:
        synonyms = get_synonyms_gensim(token.text.lower())
        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:  # Only replace certain types of words
            paraphrased_words.append(synonyms[0])
        else:
            paraphrased_words.append(token.text)
    return ' '.join(paraphrased_words)

# Gradio interface definition
with gr.Blocks() as interface:
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(lines=5, label="Input Text")
            detect_button = gr.Button("AI Detection")
            paraphrase_gensim_button = gr.Button("Paraphrase with Gensim")
            paraphrase_spacy_button = gr.Button("Paraphrase with spaCy")
        with gr.Column():
            output_text = gr.Textbox(label="Output")

    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
    paraphrase_gensim_button.click(paraphrase_with_gensim, inputs=text_input, outputs=output_text)
    paraphrase_spacy_button.click(paraphrase_with_spacy, inputs=text_input, outputs=output_text)

# Launch the Gradio app
interface.launch(debug=False)