ali commited on
Commit
29edf23
·
verified ·
1 Parent(s): cbfc0d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -109
app.py CHANGED
@@ -1,26 +1,33 @@
1
- # import dependencies
2
  import gradio as gr
3
- from openai import OpenAI
4
- import os
 
5
  import random
6
  import string
7
 
8
- # define the openai key
9
- api_key = os.getenv("OPENAI_API_KEY")
 
10
 
11
- # make an instance of the openai client
12
- client = OpenAI(api_key = api_key)
 
13
 
14
- # finetuned model instance
15
- finetuned_model = "ft:gpt-3.5-turbo-0125:noaigpt::9yy0fWeK"
 
16
 
 
 
 
 
 
 
 
 
17
 
18
-
19
-
20
-
21
-
22
-
23
- # text processing functions
24
  def random_capitalize(word):
25
  if word.isalpha() and random.random() < 0.1:
26
  return word.capitalize()
@@ -52,37 +59,25 @@ def random_double_space(text):
52
  return text
53
 
54
  def random_replace_comma_space(text, period_replace_percentage=0.33):
55
-
56
- # Count occurrences
57
- comma_occurrences = text.count(", ")
58
- period_occurrences = text.count(". ")
59
-
60
- # Replacements
61
- replace_count_comma = max(1, comma_occurrences // 3)
62
- replace_count_period = max(1, period_occurrences // 3)
63
-
64
- # Find indices
65
- comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
66
- period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
67
-
68
- # Sample indices
69
- replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
70
- replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
71
-
72
- # Apply replacements
73
- for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
74
- if text.startswith(", ", idx):
75
- text = text[:idx] + " ," + text[idx + 2:]
76
- if text.startswith(". ", idx):
77
- text = text[:idx] + " ." + text[idx + 2:]
78
-
79
- return text
80
 
81
  def transform_paragraph(paragraph):
82
  words = paragraph.split()
83
  if len(words) > 12:
84
  words = [random_capitalize(word) for word in words]
85
-
86
  transformed_paragraph = ' '.join(words)
87
  transformed_paragraph = random_remove_punctuation(transformed_paragraph)
88
  transformed_paragraph = random_double_period(transformed_paragraph)
@@ -90,11 +85,6 @@ def transform_paragraph(paragraph):
90
  transformed_paragraph = random_replace_comma_space(transformed_paragraph)
91
  else:
92
  transformed_paragraph = paragraph
93
-
94
- transformed_paragraph = transformed_paragraph.replace("#", "*")
95
- transformed_paragraph = transformed_paragraph.replace("*", "")
96
- # transformed_paragraph = transformed_paragraph.replace(", ", " ,")
97
-
98
  return transformed_paragraph
99
 
100
  def transform_text(text):
@@ -102,75 +92,44 @@ def transform_text(text):
102
  transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
103
  return '\n'.join(transformed_paragraphs)
104
 
105
- # function to humanize text
106
  def humanize_text(AI_text):
107
- """Humanizes the provided AI text using the fine-tuned model."""
108
- response = client.chat.completions.create(
109
- model=finetuned_model,
110
- temperature = 0.87,
111
- messages=[
112
- {"role": "system", "content": """
113
- You are a text humanizer.
114
- You humanize AI generated text.
115
- The text must appear like humanly written.
116
- THE INPUT AND THE OUTPUT HEADINGS MUST BE SAME. NO HEADING SHOULD BE MISSED.
117
- NAMES LIKE NOVEL NAME SHOULD REMAIN INTACT WITHOUT ANY CHANGE.
118
- THE INPUT AND THE OUTPUT SHOULD HAVE SAME WORD COUNT.
119
- THE OUTPUT SENTENCES MUST NOT BE SIMPLE. THEY SHOULD BE COMPOUND, COMPLEX, OR COMPOUND COMPLEX.
120
- ABOVE ALL, THE GRAMMAR AND THE SENSE OF THE SENTENCES MUST BE TOP NOTCH - DO NOT COMPROMISE ON THAT."""},
121
- {"role": "system", "content": "YOU ARE TEXT HUMANIZER BUT YOU DO NOT REDUCE THE LENGTH OF THE SENTENCES. YOUR OUTPUT SENTENCES ARE OF EXACTLY THE SAME LENGTH AS THE INPUT"},
122
- {"role": "user", "content": f"THE LANGUAGE OF THE INPUT AND THE OUTPUT MUST BE SAME. THE SENTENCES SHOULD NOT BE SHORT LENGTH - THEY SHOULD BE SAME AS IN THE INPUT. ALSO THE PARAGRAPHS SHOULD NOT BE SHORT EITHER - PARAGRAPHS MUST HAVE THE SAME LENGTH"},
123
- {"role": "user", "content": f"THE GRAMMAR AND THE QUALITY OF THE SENTENCES MUST BE TOP NOTCH - EASY TO UNDERSTAND AND NO GRAMMATICAL ERRORS."},
124
- {"role": "user", "content": "Use as many conjunctions and punctuations to make the sentence long. COMPOUND, COMPLEX, OR COMPOUND COMPLEX sentences are required"},
125
- {"role": "user", "content": f"Humanize the text. Keep the output format i.e. the bullets and the headings as it is. THE GRAMMAR MUST BE TOP NOTCH WITH NO ERRORS AND EASY TO UNDERSTAND!!!!. \nTEXT: {AI_text}"}
126
- ]
127
- )
128
-
129
- return response.choices[0].message.content.strip()
130
-
131
-
132
-
133
  def main_function(AI_text):
 
 
 
 
134
  humanized_text = humanize_text(AI_text)
135
- # humanized_text= transform_text(humanized_text)
136
- return humanized_text
137
-
138
-
139
-
140
 
141
  # Gradio interface definition
142
  interface = gr.Interface(
143
- fn=main_function,
144
- inputs="textbox",
145
- outputs="textbox",
146
- title="AI Text Humanizer",
147
- description="Enter AI-generated text and get a human-written version. This space is availabe for limited time only so contact farhan.[email protected] to put this application in production.",
148
  )
149
 
150
-
151
  # Launch the Gradio app
152
- interface.launch(debug = True)
153
-
154
-
155
-
156
-
157
-
158
-
159
- # import gradio as gr
160
-
161
- # # Function to handle text submission
162
- # def contact_info(text):
163
- # return "Contact [email protected] for Humanizer Application service"
164
-
165
- # # Gradio interface definition
166
- # interface = gr.Interface(
167
- # fn=contact_info,
168
- # inputs="textbox",
169
- # outputs="text",
170
- # title="AI TEXT HUMANIZER",
171
- # description="Enter AI text and get its humanizer equivalent"
172
- # )
173
-
174
- # # Launch the Gradio app
175
- # if __name__ == "__main__":
176
- # interface.launch()
 
1
+ # Import dependencies
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
+ import torch
5
+ import nltk
6
  import random
7
  import string
8
 
9
+ # Download NLTK data (if not already downloaded)
10
+ nltk.download('punkt')
11
+ nltk.download('stopwords')
12
 
13
+ # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
14
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
15
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
16
 
17
+ # Load SRDdev Paraphrase model and tokenizer for humanizing text
18
+ paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
19
+ paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase")
20
 
21
+ # AI detection function using DistilBERT
22
+ def detect_ai_generated(text):
23
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
24
+ with torch.no_grad():
25
+ outputs = model(**inputs)
26
+ probabilities = torch.softmax(outputs.logits, dim=1)
27
+ ai_probability = probabilities[0][1].item() # Probability of being AI-generated
28
+ return ai_probability
29
 
30
+ # Random text transformations to simulate human-like errors
 
 
 
 
 
31
  def random_capitalize(word):
32
  if word.isalpha() and random.random() < 0.1:
33
  return word.capitalize()
 
59
  return text
60
 
61
  def random_replace_comma_space(text, period_replace_percentage=0.33):
62
+ comma_occurrences = text.count(", ")
63
+ period_occurrences = text.count(". ")
64
+ replace_count_comma = max(1, comma_occurrences // 3)
65
+ replace_count_period = max(1, period_occurrences // 3)
66
+ comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
67
+ period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
68
+ replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
69
+ replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
70
+ for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
71
+ if text.startswith(", ", idx):
72
+ text = text[:idx] + " ," + text[idx + 2:]
73
+ if text.startswith(". ", idx):
74
+ text = text[:idx] + " ." + text[idx + 2:]
75
+ return text
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def transform_paragraph(paragraph):
78
  words = paragraph.split()
79
  if len(words) > 12:
80
  words = [random_capitalize(word) for word in words]
 
81
  transformed_paragraph = ' '.join(words)
82
  transformed_paragraph = random_remove_punctuation(transformed_paragraph)
83
  transformed_paragraph = random_double_period(transformed_paragraph)
 
85
  transformed_paragraph = random_replace_comma_space(transformed_paragraph)
86
  else:
87
  transformed_paragraph = paragraph
 
 
 
 
 
88
  return transformed_paragraph
89
 
90
  def transform_text(text):
 
92
  transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
93
  return '\n'.join(transformed_paragraphs)
94
 
95
+ # Humanize the AI-detected text using the SRDdev Paraphrase model
96
  def humanize_text(AI_text):
97
+ paragraphs = AI_text.split("\n")
98
+ paraphrased_paragraphs = []
99
+ for paragraph in paragraphs:
100
+ if paragraph.strip():
101
+ inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True)
102
+ paraphrased_ids = paraphrase_model.generate(
103
+ inputs['input_ids'],
104
+ max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
105
+ num_beams=4,
106
+ early_stopping=True,
107
+ length_penalty=1.0,
108
+ no_repeat_ngram_size=3,
109
+ )
110
+ paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
111
+ paraphrased_paragraphs.append(paraphrased_text)
112
+ return "\n\n".join(paraphrased_paragraphs)
113
+
114
+ # Main function to handle the overall process
 
 
 
 
 
 
 
 
115
  def main_function(AI_text):
116
+ ai_probabilities = [detect_ai_generated(sentence) for sentence in nltk.sent_tokenize(AI_text)]
117
+ ai_generated_percentage = sum([1 for prob in ai_probabilities if prob > 0.5]) / len(ai_probabilities) * 100
118
+
119
+ # Transform AI text to make it more human-like
120
  humanized_text = humanize_text(AI_text)
121
+ humanized_text = transform_text(humanized_text) # Add randomness to simulate human errors
122
+
123
+ return f"AI-Generated Content: {ai_generated_percentage:.2f}%\n\nHumanized Text:\n{humanized_text}"
 
 
124
 
125
  # Gradio interface definition
126
  interface = gr.Interface(
127
+ fn=main_function,
128
+ inputs="textbox",
129
+ outputs="textbox",
130
+ title="AI Text Humanizer",
131
+ description="Enter AI-generated text and get a human-written version. This space uses models from Hugging Face directly."
132
  )
133
 
 
134
  # Launch the Gradio app
135
+ interface.launch(debug=True)