sashtech commited on
Commit
af4412c
·
verified ·
1 Parent(s): 2bc5696

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -47
app.py CHANGED
@@ -27,9 +27,6 @@ download_nltk_resources()
27
 
28
  top_words = set(stopwords.words("english")) # More efficient as a set
29
 
30
- import os
31
- import json
32
-
33
  # Path to the thesaurus file
34
  thesaurus_file_path = 'en_thesaurus.jsonl' # Ensure the file path is correct
35
 
@@ -53,9 +50,33 @@ def load_thesaurus(file_path):
53
  # Load the thesaurus
54
  synonym_dict = load_thesaurus(thesaurus_file_path)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Modified plagiarism_remover function to use the loaded thesaurus
57
  def plagiarism_remover(word):
58
- # Handle stopwords, punctuation, and excluded words
59
  if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
60
  return word
61
 
@@ -66,54 +87,25 @@ def plagiarism_remover(word):
66
  if not synonyms:
67
  for syn in wordnet.synsets(word):
68
  for lemma in syn.lemmas():
69
- # Exclude overly technical synonyms or words with underscores
70
  if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
71
  synonyms.add(lemma.name())
72
 
73
- # Get part of speech for word and filter synonyms with the same POS
74
  pos_tag_word = nltk.pos_tag([word])[0]
75
 
76
- # Avoid replacing certain parts of speech
77
  if pos_tag_word[1] in exclude_tags:
78
  return word
79
 
80
  filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
81
 
82
- # Return original word if no appropriate synonyms found
83
  if not filtered_synonyms:
84
  return word
85
 
86
- # Select a random synonym from the filtered list
87
  synonym_choice = random.choice(filtered_synonyms)
88
 
89
- # Retain original capitalization
90
  if word.istitle():
91
  return synonym_choice.title()
92
  return synonym_choice
93
 
94
-
95
- # Words we don't want to replace
96
- exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
97
- exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
98
-
99
- # Initialize the English text classification pipeline for AI detection
100
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
101
-
102
- # Initialize the spell checker
103
- spell = SpellChecker()
104
-
105
- # Ensure the SpaCy model is installed
106
- try:
107
- nlp = spacy.load("en_core_web_sm")
108
- except OSError:
109
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
110
- nlp = spacy.load("en_core_web_sm")
111
-
112
- # Function to predict the label and score for English text (AI Detection)
113
- def predict_en(text):
114
- res = pipeline_en(text)[0]
115
- return res['label'], res['score']
116
-
117
  # Function to remove redundant and meaningless words
118
  def remove_redundant_words(text):
119
  doc = nlp(text)
@@ -123,7 +115,6 @@ def remove_redundant_words(text):
123
 
124
  # Function to fix spacing before punctuation
125
  def fix_punctuation_spacing(text):
126
- # Split the text into words and punctuation
127
  words = text.split(' ')
128
  cleaned_words = []
129
  punctuation_marks = {',', '.', "'", '!', '?', ':'}
@@ -139,8 +130,7 @@ def fix_punctuation_spacing(text):
139
 
140
  # Function to fix possessives like "Earth's"
141
  def fix_possessives(text):
142
- text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
143
- return text
144
 
145
  # Function to capitalize the first letter of sentences and proper nouns
146
  def capitalize_sentences_and_nouns(text):
@@ -216,18 +206,15 @@ def ensure_subject_verb_agreement(text):
216
  corrected_text.append(token.text)
217
  return ' '.join(corrected_text)
218
 
219
- # Function to correct spelling errors
220
  # Function to correct spelling errors
221
  def correct_spelling(text):
222
  words = text.split()
223
  corrected_words = []
224
  for word in words:
225
  corrected_word = spell.correction(word)
226
- # If correction returns None, keep the original word
227
  corrected_words.append(corrected_word if corrected_word is not None else word)
228
  return ' '.join(corrected_words)
229
 
230
-
231
  # Main processing function for paraphrasing and grammar correction
232
  def paraphrase_and_correct(text):
233
  cleaned_text = remove_redundant_words(text)
@@ -239,7 +226,7 @@ def paraphrase_and_correct(text):
239
  cleaned_text = correct_article_errors(cleaned_text)
240
  cleaned_text = ensure_subject_verb_agreement(cleaned_text)
241
  cleaned_text = correct_spelling(cleaned_text)
242
- plag_removed = plagiarism_removal(cleaned_text)
243
  return plag_removed
244
 
245
  # Create the Gradio interface
@@ -247,15 +234,15 @@ with gr.Blocks() as demo:
247
  gr.Markdown("# AI Text Processor")
248
  with gr.Tab("AI Detection"):
249
  t1 = gr.Textbox(lines=5, label='Input Text')
250
- output1 = gr.Label()
251
- button1 = gr.Button("🚀 Process!")
252
- button1.click(fn=predict_en, inputs=t1, outputs=output1)
 
253
 
254
  with gr.Tab("Paraphrasing and Grammar Correction"):
255
  t2 = gr.Textbox(lines=5, label='Input Text')
256
- button2 = gr.Button("🚀 Process!")
257
- output2 = gr.Textbox(lines=5, label='Processed Text')
258
-
259
- button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=output2)
260
 
261
  demo.launch()
 
27
 
28
  top_words = set(stopwords.words("english")) # More efficient as a set
29
 
 
 
 
30
  # Path to the thesaurus file
31
  thesaurus_file_path = 'en_thesaurus.jsonl' # Ensure the file path is correct
32
 
 
50
  # Load the thesaurus
51
  synonym_dict = load_thesaurus(thesaurus_file_path)
52
 
53
+ # Words and POS tags we don't want to replace
54
+ exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
55
+ exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
56
+
57
+ # Initialize the English text classification pipeline for AI detection
58
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
59
+
60
+ # Initialize the spell checker
61
+ spell = SpellChecker()
62
+
63
+ # Ensure the SpaCy model is installed
64
+ try:
65
+ nlp = spacy.load("en_core_web_sm")
66
+ except OSError:
67
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
68
+ nlp = spacy.load("en_core_web_sm")
69
+
70
+ # Function to predict the label and score for English text (AI Detection)
71
+ def predict_en(text):
72
+ try:
73
+ res = pipeline_en(text)[0]
74
+ return res['label'], res['score']
75
+ except Exception as e:
76
+ return f"Error during AI detection: {e}"
77
+
78
  # Modified plagiarism_remover function to use the loaded thesaurus
79
  def plagiarism_remover(word):
 
80
  if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
81
  return word
82
 
 
87
  if not synonyms:
88
  for syn in wordnet.synsets(word):
89
  for lemma in syn.lemmas():
 
90
  if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
91
  synonyms.add(lemma.name())
92
 
 
93
  pos_tag_word = nltk.pos_tag([word])[0]
94
 
 
95
  if pos_tag_word[1] in exclude_tags:
96
  return word
97
 
98
  filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
99
 
 
100
  if not filtered_synonyms:
101
  return word
102
 
 
103
  synonym_choice = random.choice(filtered_synonyms)
104
 
 
105
  if word.istitle():
106
  return synonym_choice.title()
107
  return synonym_choice
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # Function to remove redundant and meaningless words
110
  def remove_redundant_words(text):
111
  doc = nlp(text)
 
115
 
116
  # Function to fix spacing before punctuation
117
  def fix_punctuation_spacing(text):
 
118
  words = text.split(' ')
119
  cleaned_words = []
120
  punctuation_marks = {',', '.', "'", '!', '?', ':'}
 
130
 
131
  # Function to fix possessives like "Earth's"
132
  def fix_possessives(text):
133
+ return re.sub(r'(\w)\s\'\s?s', r"\1's", text)
 
134
 
135
  # Function to capitalize the first letter of sentences and proper nouns
136
  def capitalize_sentences_and_nouns(text):
 
206
  corrected_text.append(token.text)
207
  return ' '.join(corrected_text)
208
 
 
209
  # Function to correct spelling errors
210
  def correct_spelling(text):
211
  words = text.split()
212
  corrected_words = []
213
  for word in words:
214
  corrected_word = spell.correction(word)
 
215
  corrected_words.append(corrected_word if corrected_word is not None else word)
216
  return ' '.join(corrected_words)
217
 
 
218
  # Main processing function for paraphrasing and grammar correction
219
  def paraphrase_and_correct(text):
220
  cleaned_text = remove_redundant_words(text)
 
226
  cleaned_text = correct_article_errors(cleaned_text)
227
  cleaned_text = ensure_subject_verb_agreement(cleaned_text)
228
  cleaned_text = correct_spelling(cleaned_text)
229
+ plag_removed = plagiarism_remover(cleaned_text)
230
  return plag_removed
231
 
232
  # Create the Gradio interface
 
234
  gr.Markdown("# AI Text Processor")
235
  with gr.Tab("AI Detection"):
236
  t1 = gr.Textbox(lines=5, label='Input Text')
237
+ btn1 = gr.Button("Detect AI")
238
+ out1 = gr.Textbox(label='Prediction', interactive=False)
239
+ out2 = gr.Textbox(label='Confidence', interactive=False)
240
+ btn1.click(fn=predict_en, inputs=t1, outputs=[out1, out2])
241
 
242
  with gr.Tab("Paraphrasing and Grammar Correction"):
243
  t2 = gr.Textbox(lines=5, label='Input Text')
244
+ btn2 = gr.Button("Process Text")
245
+ out3 = gr.Textbox(label='Processed Text', interactive=False)
246
+ btn2.click(fn=paraphrase_and_correct, inputs=t2, outputs=out3)
 
247
 
248
  demo.launch()