sashtech commited on
Commit
031a20c
·
verified ·
1 Parent(s): 6f0ffd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import spacy
5
  import subprocess
@@ -11,7 +11,7 @@ from gensim import downloader as api
11
  nltk.download('wordnet')
12
  nltk.download('omw-1.4')
13
 
14
- # Ensure the spaCy model is installed
15
  try:
16
  nlp = spacy.load("en_core_web_sm")
17
  except OSError:
@@ -24,18 +24,21 @@ word_vectors = api.load("glove-wiki-gigaword-50")
24
  # Check for GPU and set the device accordingly
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
 
27
- # Load AI Detector model and tokenizer from Hugging Face (e.g., GPT-Neo)
28
- tokenizer_ai = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
29
- model_ai = AutoModelForSequenceClassification.from_pretrained("EleutherAI/gpt-neo-2.7B").to(device)
30
 
31
- # AI detection function using GPT-Neo-based model
32
  def detect_ai_generated(text):
33
  inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
34
  with torch.no_grad():
35
  outputs = model_ai(**inputs)
36
- probabilities = torch.softmax(outputs.logits, dim=1)
37
- ai_probability = probabilities[0][1].item() * 100 # Probability of being AI-generated
38
- return f"AI-Generated Content Probability: {ai_probability:.2f}%"
 
 
 
39
 
40
  # Function to get synonyms using NLTK WordNet
41
  def get_synonyms_nltk(word, pos):
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
3
  import torch
4
  import spacy
5
  import subprocess
 
11
  nltk.download('wordnet')
12
  nltk.download('omw-1.4')
13
 
14
+ # Ensure the SpaCy model is installed
15
  try:
16
  nlp = spacy.load("en_core_web_sm")
17
  except OSError:
 
24
  # Check for GPU and set the device accordingly
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
 
27
+ # Load GPT-3.5-turbo model and tokenizer from Hugging Face
28
+ tokenizer_ai = AutoTokenizer.from_pretrained("Xenova/gpt-3.5-turbo")
29
+ model_ai = AutoModel.from_pretrained("Xenova/gpt-3.5-turbo").to(device)
30
 
31
+ # AI detection function using GPT-3.5-turbo-based model
32
  def detect_ai_generated(text):
33
  inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
34
  with torch.no_grad():
35
  outputs = model_ai(**inputs)
36
+ # Since this model does not directly output classification logits, you'll need to process the hidden states
37
+ # For simplicity, let's just use the first hidden state for now (you may need to adjust based on your use case)
38
+ hidden_state = outputs.last_hidden_state[:, 0, :] # Use the first token's representation
39
+ # Example: calculate some kind of score based on the hidden state
40
+ score = torch.mean(hidden_state).item()
41
+ return f"AI-Generated Content Score: {score:.2f}"
42
 
43
  # Function to get synonyms using NLTK WordNet
44
  def get_synonyms_nltk(word, pos):