sashtech commited on
Commit
f294823
·
verified ·
1 Parent(s): ffbdb95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -11
app.py CHANGED
@@ -19,7 +19,6 @@ nltk.download('averaged_perceptron_tagger')
19
  nltk.download('averaged_perceptron_tagger_eng')
20
  nltk.download('wordnet')
21
  nltk.download('omw-1.4')
22
- nltk.download('punkt_tab')
23
 
24
  # Initialize stopwords
25
  stop_words = set(stopwords.words("english"))
@@ -41,11 +40,12 @@ except OSError:
41
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
42
  nlp = spacy.load("en_core_web_sm")
43
 
 
44
  def plagiarism_removal(text):
45
  def plagiarism_remover(word):
46
  if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
47
  return word
48
-
49
  # Find synonyms
50
  synonyms = set()
51
  for syn in wordnet.synsets(word):
@@ -57,7 +57,7 @@ def plagiarism_removal(text):
57
 
58
  if pos_tag_word[1] in exclude_tags:
59
  return word
60
-
61
  filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
62
 
63
  if not filtered_synonyms:
@@ -71,26 +71,29 @@ def plagiarism_removal(text):
71
 
72
  para_split = word_tokenize(text)
73
  final_text = [plagiarism_remover(word) for word in para_split]
74
-
75
  corrected_text = []
76
  for i in range(len(final_text)):
77
  if final_text[i] in string.punctuation and i > 0:
78
- corrected_text[-1] += final_text[i]
79
  else:
80
  corrected_text.append(final_text[i])
81
 
82
  return " ".join(corrected_text)
83
 
 
84
  def predict_en(text):
85
  res = pipeline_en(text)[0]
86
  return res['label'], res['score']
87
 
 
88
  def remove_redundant_words(text):
89
  doc = nlp(text)
90
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
91
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
92
  return ' '.join(filtered_text)
93
 
 
94
  def fix_punctuation_spacing(text):
95
  words = text.split(' ')
96
  cleaned_words = []
@@ -103,12 +106,14 @@ def fix_punctuation_spacing(text):
103
  cleaned_words.append(word)
104
 
105
  return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
106
- .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
 
107
 
108
  def fix_possessives(text):
109
  text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
110
  return text
111
 
 
112
  def capitalize_sentences_and_nouns(text):
113
  doc = nlp(text)
114
  corrected_text = []
@@ -126,30 +131,41 @@ def capitalize_sentences_and_nouns(text):
126
 
127
  return ' '.join(corrected_text)
128
 
 
129
  def force_first_letter_capital(text):
130
  sentences = re.split(r'(?<=\w[.!?])\s+', text)
131
  capitalized_sentences = []
132
-
133
  for sentence in sentences:
134
  if sentence:
135
  capitalized_sentence = sentence[0].capitalize() + sentence[1:]
136
  if not re.search(r'[.!?]$', capitalized_sentence):
137
  capitalized_sentence += '.'
138
  capitalized_sentences.append(capitalized_sentence)
139
-
140
  return " ".join(capitalized_sentences)
141
 
 
142
  def correct_tense_errors(text):
143
  doc = nlp(text)
144
  corrected_text = []
145
  for token in doc:
146
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
147
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
148
- corrected_text.append(lemma)
 
 
 
 
 
 
 
 
149
  else:
150
  corrected_text.append(token.text)
151
  return ' '.join(corrected_text)
152
 
 
153
  def correct_article_errors(text):
154
  doc = nlp(text)
155
  corrected_text = []
@@ -166,6 +182,7 @@ def correct_article_errors(text):
166
  corrected_text.append(token.text)
167
  return ' '.join(corrected_text)
168
 
 
169
  def ensure_subject_verb_agreement(text):
170
  doc = nlp(text)
171
  corrected_text = []
@@ -178,6 +195,7 @@ def ensure_subject_verb_agreement(text):
178
  corrected_text.append(token.text)
179
  return ' '.join(corrected_text)
180
 
 
181
  def correct_spelling(text):
182
  words = text.split()
183
  corrected_words = []
@@ -189,6 +207,7 @@ def correct_spelling(text):
189
  corrected_words.append(word)
190
  return ' '.join(corrected_words)
191
 
 
192
  def paraphrase_and_correct(text):
193
  paragraphs = text.split("\n\n") # Split by paragraphs
194
 
@@ -209,6 +228,7 @@ def paraphrase_and_correct(text):
209
 
210
  return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
211
 
 
212
  # Gradio app setup
213
  with gr.Blocks() as demo:
214
  with gr.Tab("AI Detection"):
 
19
  nltk.download('averaged_perceptron_tagger_eng')
20
  nltk.download('wordnet')
21
  nltk.download('omw-1.4')
 
22
 
23
  # Initialize stopwords
24
  stop_words = set(stopwords.words("english"))
 
40
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
41
  nlp = spacy.load("en_core_web_sm")
42
 
43
+
44
  def plagiarism_removal(text):
45
  def plagiarism_remover(word):
46
  if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
47
  return word
48
+
49
  # Find synonyms
50
  synonyms = set()
51
  for syn in wordnet.synsets(word):
 
57
 
58
  if pos_tag_word[1] in exclude_tags:
59
  return word
60
+
61
  filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
62
 
63
  if not filtered_synonyms:
 
71
 
72
  para_split = word_tokenize(text)
73
  final_text = [plagiarism_remover(word) for word in para_split]
74
+
75
  corrected_text = []
76
  for i in range(len(final_text)):
77
  if final_text[i] in string.punctuation and i > 0:
78
+ corrected_text[-1] += final_text[i]
79
  else:
80
  corrected_text.append(final_text[i])
81
 
82
  return " ".join(corrected_text)
83
 
84
+
85
  def predict_en(text):
86
  res = pipeline_en(text)[0]
87
  return res['label'], res['score']
88
 
89
+
90
  def remove_redundant_words(text):
91
  doc = nlp(text)
92
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
93
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
94
  return ' '.join(filtered_text)
95
 
96
+
97
  def fix_punctuation_spacing(text):
98
  words = text.split(' ')
99
  cleaned_words = []
 
106
  cleaned_words.append(word)
107
 
108
  return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
109
+ .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
110
+
111
 
112
  def fix_possessives(text):
113
  text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
114
  return text
115
 
116
+
117
  def capitalize_sentences_and_nouns(text):
118
  doc = nlp(text)
119
  corrected_text = []
 
131
 
132
  return ' '.join(corrected_text)
133
 
134
+
135
  def force_first_letter_capital(text):
136
  sentences = re.split(r'(?<=\w[.!?])\s+', text)
137
  capitalized_sentences = []
138
+
139
  for sentence in sentences:
140
  if sentence:
141
  capitalized_sentence = sentence[0].capitalize() + sentence[1:]
142
  if not re.search(r'[.!?]$', capitalized_sentence):
143
  capitalized_sentence += '.'
144
  capitalized_sentences.append(capitalized_sentence)
145
+
146
  return " ".join(capitalized_sentences)
147
 
148
+
149
  def correct_tense_errors(text):
150
  doc = nlp(text)
151
  corrected_text = []
152
  for token in doc:
153
+ if token.pos_ == "VERB":
154
+ tense = token.morph.get("Tense")
155
+ if tense:
156
+ if 'Past' in tense:
157
+ corrected_text.append(token.lemma_ + "ed")
158
+ elif 'Present' in tense and token.tag_ == 'VBZ':
159
+ corrected_text.append(token.lemma_ + "s")
160
+ else:
161
+ corrected_text.append(token.lemma_)
162
+ else:
163
+ corrected_text.append(token.text)
164
  else:
165
  corrected_text.append(token.text)
166
  return ' '.join(corrected_text)
167
 
168
+
169
  def correct_article_errors(text):
170
  doc = nlp(text)
171
  corrected_text = []
 
182
  corrected_text.append(token.text)
183
  return ' '.join(corrected_text)
184
 
185
+
186
  def ensure_subject_verb_agreement(text):
187
  doc = nlp(text)
188
  corrected_text = []
 
195
  corrected_text.append(token.text)
196
  return ' '.join(corrected_text)
197
 
198
+
199
  def correct_spelling(text):
200
  words = text.split()
201
  corrected_words = []
 
207
  corrected_words.append(word)
208
  return ' '.join(corrected_words)
209
 
210
+
211
  def paraphrase_and_correct(text):
212
  paragraphs = text.split("\n\n") # Split by paragraphs
213
 
 
228
 
229
  return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
230
 
231
+
232
  # Gradio app setup
233
  with gr.Blocks() as demo:
234
  with gr.Tab("AI Detection"):