sashtech commited on
Commit
5065a5b
·
verified ·
1 Parent(s): e675ad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -1
app.py CHANGED
@@ -5,7 +5,6 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
- import language_check # Use language-check instead of language-tool-python
9
  from gensim import downloader as api
10
 
11
  # Ensure necessary NLTK data is downloaded
@@ -28,3 +27,66 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
  # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
29
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
30
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
 
8
  from gensim import downloader as api
9
 
10
  # Ensure necessary NLTK data is downloaded
 
27
  # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
28
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
29
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
30
+
31
+ # AI detection function using DistilBERT
32
+ def detect_ai_generated(text):
33
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
34
+ with torch.no_grad():
35
+ outputs = model(**inputs)
36
+ probabilities = torch.softmax(outputs.logits, dim=1)
37
+ ai_probability = probabilities[0][1].item() # Probability of being AI-generated
38
+ return f"AI-Generated Content Probability: {ai_probability:.2f}%"
39
+
40
+ # Function to get synonyms using NLTK WordNet
41
+ def get_synonyms_nltk(word, pos):
42
+ synsets = wordnet.synsets(word, pos=pos)
43
+ if synsets:
44
+ lemmas = synsets[0].lemmas()
45
+ return [lemma.name() for lemma in lemmas]
46
+ return []
47
+
48
+ # Paraphrasing function using spaCy and NLTK (without grammar correction)
49
+ def paraphrase_with_spacy_nltk(text):
50
+ doc = nlp(text)
51
+ paraphrased_words = []
52
+
53
+ for token in doc:
54
+ # Map spaCy POS tags to WordNet POS tags
55
+ pos = None
56
+ if token.pos_ in {"NOUN"}:
57
+ pos = wordnet.NOUN
58
+ elif token.pos_ in {"VERB"}:
59
+ pos = wordnet.VERB
60
+ elif token.pos_ in {"ADJ"}:
61
+ pos = wordnet.ADJ
62
+ elif token.pos_ in {"ADV"}:
63
+ pos = wordnet.ADV
64
+
65
+ synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
66
+
67
+ # Replace with a synonym only if it makes sense
68
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
69
+ paraphrased_words.append(synonyms[0])
70
+ else:
71
+ paraphrased_words.append(token.text)
72
+
73
+ # Join the words back into a sentence
74
+ paraphrased_sentence = ' '.join(paraphrased_words)
75
+
76
+ return paraphrased_sentence
77
+
78
+ # Gradio interface definition
79
+ with gr.Blocks() as interface:
80
+ with gr.Row():
81
+ with gr.Column():
82
+ text_input = gr.Textbox(lines=5, label="Input Text")
83
+ detect_button = gr.Button("AI Detection")
84
+ paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK")
85
+ with gr.Column():
86
+ output_text = gr.Textbox(label="Output")
87
+
88
+ detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
89
+ paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
90
+
91
+ # Launch the Gradio app
92
+ interface.launch(debug=False)