Shujaat Ali commited on
Commit
84ec915
·
verified ·
1 Parent(s): c93f011

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -72
app.py CHANGED
@@ -3,8 +3,6 @@ import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
  import torch
5
  import nltk
6
- import random
7
- import string
8
 
9
  # Download NLTK data (if not already downloaded)
10
  nltk.download('punkt')
@@ -30,71 +28,6 @@ def detect_ai_generated(text):
30
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
31
  return ai_probability
32
 
33
- # Random text transformations to simulate human-like errors
34
- def random_capitalize(word):
35
- if word.isalpha() and random.random() < 0.1:
36
- return word.capitalize()
37
- return word
38
-
39
- def random_remove_punctuation(text):
40
- if random.random() < 0.2:
41
- text = list(text)
42
- indices = [i for i, c in enumerate(text) if c in string.punctuation]
43
- if indices:
44
- remove_indices = random.sample(indices, min(3, len(indices)))
45
- for idx in sorted(remove_indices, reverse=True):
46
- text.pop(idx)
47
- return ''.join(text)
48
- return text
49
-
50
- def random_double_period(text):
51
- if random.random() < 0.2:
52
- text = text.replace('.', '..', 3)
53
- return text
54
-
55
- def random_double_space(text):
56
- if random.random() < 0.2:
57
- words = text.split()
58
- for _ in range(min(3, len(words) - 1)):
59
- idx = random.randint(0, len(words) - 2)
60
- words[idx] += ' '
61
- return ' '.join(words)
62
- return text
63
-
64
- def random_replace_comma_space(text, period_replace_percentage=0.33):
65
- comma_occurrences = text.count(", ")
66
- period_occurrences = text.count(". ")
67
- replace_count_comma = max(1, comma_occurrences // 3)
68
- replace_count_period = max(1, period_occurrences // 3)
69
- comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
70
- period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
71
- replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
72
- replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
73
- for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
74
- if text.startswith(", ", idx):
75
- text = text[:idx] + " ," + text[idx + 2:]
76
- if text.startswith(". ", idx):
77
- text = text[:idx] + " ." + text[idx + 2:]
78
- return text
79
-
80
- def transform_paragraph(paragraph):
81
- words = paragraph.split()
82
- if len(words) > 12:
83
- words = [random_capitalize(word) for word in words]
84
- transformed_paragraph = ' '.join(words)
85
- transformed_paragraph = random_remove_punctuation(transformed_paragraph)
86
- transformed_paragraph = random_double_period(transformed_paragraph)
87
- transformed_paragraph = random_double_space(transformed_paragraph)
88
- transformed_paragraph = random_replace_comma_space(transformed_paragraph)
89
- else:
90
- transformed_paragraph = paragraph
91
- return transformed_paragraph
92
-
93
- def transform_text(text):
94
- paragraphs = text.split('\n')
95
- transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
96
- return '\n'.join(transformed_paragraphs)
97
-
98
  # Humanize the AI-detected text using the SRDdev Paraphrase model
99
  def humanize_text(AI_text):
100
  paragraphs = AI_text.split("\n")
@@ -116,14 +49,12 @@ def humanize_text(AI_text):
116
 
117
  # Main function to handle the overall process
118
  def main_function(AI_text):
119
- ai_probabilities = [detect_ai_generated(sentence) for sentence in nltk.sent_tokenize(AI_text)]
120
- ai_generated_percentage = sum([1 for prob in ai_probabilities if prob > 0.5]) / len(ai_probabilities) * 100
121
 
122
- # Transform AI text to make it more human-like
123
  humanized_text = humanize_text(AI_text)
124
- humanized_text = transform_text(humanized_text) # Add randomness to simulate human errors
125
 
126
- return f"AI-Generated Content: {ai_generated_percentage:.2f}%\n\nHumanized Text:\n{humanized_text}"
127
 
128
  # Gradio interface definition
129
  interface = gr.Interface(
 
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
  import torch
5
  import nltk
 
 
6
 
7
  # Download NLTK data (if not already downloaded)
8
  nltk.download('punkt')
 
28
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
29
  return ai_probability
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Humanize the AI-detected text using the SRDdev Paraphrase model
32
  def humanize_text(AI_text):
33
  paragraphs = AI_text.split("\n")
 
49
 
50
  # Main function to handle the overall process
51
  def main_function(AI_text):
52
+ ai_probability = detect_ai_generated(AI_text)
 
53
 
54
+ # Humanize AI text
55
  humanized_text = humanize_text(AI_text)
 
56
 
57
+ return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"
58
 
59
  # Gradio interface definition
60
  interface = gr.Interface(