sashtech commited on
Commit
4d1390a
·
verified ·
1 Parent(s): b3aee5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -72
app.py CHANGED
@@ -3,7 +3,6 @@ import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
  import torch
5
  import nltk
6
- import spacy
7
  from nltk.corpus import wordnet
8
  import subprocess
9
 
@@ -12,13 +11,6 @@ nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('wordnet') # Download WordNet
14
 
15
- # Download spaCy model if not already installed
16
- try:
17
- nlp = spacy.load("en_core_web_sm")
18
- except OSError:
19
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
20
- nlp = spacy.load("en_core_web_sm")
21
-
22
  # Check for GPU and set the device accordingly
23
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
 
@@ -26,32 +18,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
27
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
28
 
29
- # Load SRDdev Paraphrase model and tokenizer for humanizing text
30
- paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
31
- paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase").to(device)
32
-
33
- # Function to find synonyms using WordNet via NLTK
34
- def get_synonyms(word):
35
- synonyms = set()
36
- for syn in wordnet.synsets(word):
37
- for lemma in syn.lemmas():
38
- synonyms.add(lemma.name())
39
- return list(synonyms)
40
-
41
- # Replace words with synonyms using spaCy and WordNet
42
- def replace_with_synonyms(text):
43
- doc = nlp(text)
44
- processed_text = []
45
- for token in doc:
46
- synonyms = get_synonyms(token.text.lower())
47
- if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}: # Only replace certain types of words
48
- replacement = synonyms[0] # Replace with the first synonym
49
- if token.is_title:
50
- replacement = replacement.capitalize()
51
- processed_text.append(replacement)
52
- else:
53
- processed_text.append(token.text)
54
- return " ".join(processed_text)
55
 
56
  # AI detection function using DistilBERT
57
  def detect_ai_generated(text):
@@ -59,49 +28,46 @@ def detect_ai_generated(text):
59
  with torch.no_grad():
60
  outputs = model(**inputs)
61
  probabilities = torch.softmax(outputs.logits, dim=1)
62
- return probabilities[0][1].item() # Probability of being AI-generated
 
63
 
64
- # Humanize the AI-detected text using the SRDdev Paraphrase model
65
  def humanize_text(AI_text):
66
- paragraphs = AI_text.split("\n")
67
- paraphrased_paragraphs = []
68
- for paragraph in paragraphs:
69
- if paragraph.strip():
70
- inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True).to(device)
71
- with torch.no_grad(): # Avoid gradient calculations for faster inference
72
- paraphrased_ids = paraphrase_model.generate(
73
- inputs['input_ids'],
74
- max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
75
- num_beams=4,
76
- early_stopping=True,
77
- length_penalty=1.0,
78
- no_repeat_ngram_size=3,
79
- )
80
- paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
81
- paraphrased_paragraphs.append(paraphrased_text)
82
- return "\n\n".join(paraphrased_paragraphs)
83
-
84
- # Main function to handle the overall process
85
- def main_function(AI_text):
86
- # Replace words with synonyms
87
- text_with_synonyms = replace_with_synonyms(AI_text)
88
-
89
- # Detect AI-generated content
90
- ai_probability = detect_ai_generated(text_with_synonyms)
91
-
92
- # Humanize AI text
93
- humanized_text = humanize_text(text_with_synonyms)
94
-
95
- return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"
96
 
97
  # Gradio interface definition
98
- interface = gr.Interface(
99
- fn=main_function,
100
  inputs="textbox",
101
- outputs="textbox",
102
- title="AI Text Humanizer with Synonym Replacement",
103
- description="Enter AI-generated text and get a human-written version, with synonyms replaced for more natural output. This space uses models from Hugging Face directly."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  )
105
 
106
  # Launch the Gradio app
107
- interface.launch(debug=False) # Turn off debug mode for production
 
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
  import torch
5
  import nltk
 
6
  from nltk.corpus import wordnet
7
  import subprocess
8
 
 
11
  nltk.download('stopwords')
12
  nltk.download('wordnet') # Download WordNet
13
 
 
 
 
 
 
 
 
14
  # Check for GPU and set the device accordingly
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
 
 
18
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
19
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
20
 
21
+ # Load Parrot Paraphraser model and tokenizer for humanizing text
22
+ paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
23
+ paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # AI detection function using DistilBERT
26
  def detect_ai_generated(text):
 
28
  with torch.no_grad():
29
  outputs = model(**inputs)
30
  probabilities = torch.softmax(outputs.logits, dim=1)
31
+ ai_probability = probabilities[0][1].item() # Probability of being AI-generated
32
+ return f"AI-Generated Content Probability: {ai_probability:.2f}%"
33
 
34
+ # Humanize the AI-detected text using the Parrot Paraphraser model
35
  def humanize_text(AI_text):
36
+ inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
37
+ with torch.no_grad(): # Avoid gradient calculations for faster inference
38
+ paraphrased_ids = paraphrase_model.generate(
39
+ inputs['input_ids'],
40
+ max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
41
+ num_beams=4,
42
+ early_stopping=True,
43
+ length_penalty=1.0,
44
+ no_repeat_ngram_size=3,
45
+ )
46
+ paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
47
+ return f"Humanized Text:\n{paraphrased_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # Gradio interface definition
50
+ ai_detection_interface = gr.Interface(
51
+ fn=detect_ai_generated,
52
  inputs="textbox",
53
+ outputs="text",
54
+ title="AI Text Detection",
55
+ description="Enter text to determine the probability of it being AI-generated."
56
+ )
57
+
58
+ humanization_interface = gr.Interface(
59
+ fn=humanize_text,
60
+ inputs="textbox",
61
+ outputs="text",
62
+ title="Text Humanizer",
63
+ description="Enter text to get a human-written version, paraphrased for natural output."
64
+ )
65
+
66
+ # Combine both interfaces into a single Gradio app with tabs
67
+ interface = gr.TabbedInterface(
68
+ [ai_detection_interface, humanization_interface],
69
+ ["AI Detection", "Humanization"]
70
  )
71
 
72
  # Launch the Gradio app
73
+ interface.launch(debug=False)