sashtech commited on
Commit
6b18ba5
·
verified ·
1 Parent(s): 59f9d5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -35
app.py CHANGED
@@ -1,20 +1,16 @@
1
  # Import dependencies
2
  import gradio as gr
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import torch
5
  import nltk
6
  from nltk.corpus import wordnet
7
- from gensim.models import KeyedVectors
8
- from nltk.tokenize import word_tokenize
9
 
10
  # Download NLTK data (if not already downloaded)
11
  nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('wordnet') # Download WordNet
14
 
15
- # Load Word2Vec model from Gensim
16
- word_vectors = KeyedVectors.load_word2vec_format('path/to/GoogleNews-vectors-negative300.bin.gz', binary=True, limit=100000) # Adjust path as needed
17
-
18
  # Check for GPU and set the device accordingly
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
 
@@ -22,25 +18,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
23
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
24
 
25
- # Function to get synonyms using Gensim Word2Vec
26
- def get_synonyms_gensim(word):
27
- try:
28
- synonyms = word_vectors.most_similar(positive=[word], topn=5)
29
- return [synonym[0] for synonym in synonyms]
30
- except KeyError:
31
- return []
32
-
33
- # Paraphrasing function using Gensim for synonym replacement
34
- def paraphrase_text(text):
35
- words = word_tokenize(text)
36
- paraphrased_words = []
37
- for word in words:
38
- synonyms = get_synonyms_gensim(word.lower())
39
- if synonyms:
40
- paraphrased_words.append(synonyms[0])
41
- else:
42
- paraphrased_words.append(word)
43
- return ' '.join(paraphrased_words)
44
 
45
  # AI detection function using DistilBERT
46
  def detect_ai_generated(text):
@@ -51,18 +31,43 @@ def detect_ai_generated(text):
51
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
52
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Gradio interface definition
55
- with gr.Blocks() as interface:
56
- with gr.Row():
57
- with gr.Column():
58
- text_input = gr.Textbox(lines=5, label="Input Text")
59
- detect_button = gr.Button("AI Detection")
60
- paraphrase_button = gr.Button("Paraphrase Text")
61
- with gr.Column():
62
- output_text = gr.Textbox(label="Output")
 
 
 
 
 
 
 
63
 
64
- detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
65
- paraphrase_button.click(paraphrase_text, inputs=text_input, outputs=output_text)
 
 
 
66
 
67
  # Launch the Gradio app
68
  interface.launch(debug=False)
 
1
  # Import dependencies
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
  import torch
5
  import nltk
6
  from nltk.corpus import wordnet
7
+ import subprocess
 
8
 
9
  # Download NLTK data (if not already downloaded)
10
  nltk.download('punkt')
11
  nltk.download('stopwords')
12
  nltk.download('wordnet') # Download WordNet
13
 
 
 
 
14
  # Check for GPU and set the device accordingly
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
 
 
18
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
19
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
20
 
21
+ # Load Parrot Paraphraser model and tokenizer for humanizing text
22
+ paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
23
+ paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # AI detection function using DistilBERT
26
  def detect_ai_generated(text):
 
31
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
32
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
33
 
34
+ # Humanize the AI-detected text using the Parrot Paraphraser model
35
+ def humanize_text(AI_text):
36
+ inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
37
+ with torch.no_grad(): # Avoid gradient calculations for faster inference
38
+ paraphrased_ids = paraphrase_model.generate(
39
+ inputs['input_ids'],
40
+ max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
41
+ num_beams=4,
42
+ early_stopping=True,
43
+ length_penalty=1.0,
44
+ no_repeat_ngram_size=3,
45
+ )
46
+ paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
47
+ return f"Humanized Text:\n{paraphrased_text}"
48
+
49
  # Gradio interface definition
50
+ ai_detection_interface = gr.Interface(
51
+ fn=detect_ai_generated,
52
+ inputs="textbox",
53
+ outputs="text",
54
+ title="AI Text Detection",
55
+ description="Enter text to determine the probability of it being AI-generated."
56
+ )
57
+
58
+ humanization_interface = gr.Interface(
59
+ fn=humanize_text,
60
+ inputs="textbox",
61
+ outputs="text",
62
+ title="Text Humanizer",
63
+ description="Enter text to get a human-written version, paraphrased for natural output."
64
+ )
65
 
66
+ # Combine both interfaces into a single Gradio app with tabs
67
+ interface = gr.TabbedInterface(
68
+ [ai_detection_interface, humanization_interface],
69
+ ["AI Detection", "Humanization"]
70
+ )
71
 
72
  # Launch the Gradio app
73
  interface.launch(debug=False)