sashtech commited on
Commit
2174db5
·
verified ·
1 Parent(s): 051de31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -26
app.py CHANGED
@@ -4,22 +4,25 @@ from transformers import pipeline
4
  import spacy
5
  import subprocess
6
  import nltk
7
- from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
  import re
 
 
10
 
11
-
12
-
13
  nltk.download('punkt')
14
  nltk.download('stopwords')
15
  nltk.download('averaged_perceptron_tagger')
16
  nltk.download('wordnet')
 
 
17
  top_words = set(stopwords.words("english")) # More efficient as a set
18
 
19
  def plagiarism_removal(text):
20
  def plagiarism_remover(word):
21
  # Handle stopwords, punctuation, and excluded words
22
- if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
23
  return word
24
 
25
  # Find synonyms
@@ -52,7 +55,7 @@ def plagiarism_removal(text):
52
  return synonym_choice
53
 
54
  # Tokenize, replace words, and join them back
55
- para_split = word_tokenize(text)
56
  final_text = [plagiarism_remover(word) for word in para_split]
57
 
58
  # Handle spacing around punctuation correctly
@@ -65,12 +68,6 @@ def plagiarism_removal(text):
65
 
66
  return " ".join(corrected_text)
67
 
68
-
69
-
70
-
71
-
72
-
73
-
74
  # Words we don't want to replace
75
  exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
76
  exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
@@ -81,10 +78,6 @@ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt
81
  # Initialize the spell checker
82
  spell = SpellChecker()
83
 
84
- # Ensure necessary NLTK data is downloaded
85
- nltk.download('wordnet')
86
- nltk.download('omw-1.4')
87
-
88
  # Ensure the SpaCy model is installed
89
  try:
90
  nlp = spacy.load("en_core_web_sm")
@@ -211,14 +204,10 @@ def correct_spelling(text):
211
  corrected_words.append(word)
212
  return ' '.join(corrected_words)
213
 
214
-
215
-
216
-
217
  # Main function for paraphrasing and grammar correction
218
  def paraphrase_and_correct(text):
219
- # Add synonym replacement here
220
  cleaned_text = remove_redundant_words(text)
221
- plag_removed=plagiarism_removal(cleaned_text)
222
  paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
223
  paraphrased_text = force_first_letter_capital(paraphrased_text)
224
  paraphrased_text = correct_article_errors(paraphrased_text)
@@ -240,11 +229,11 @@ with gr.Blocks() as demo:
240
 
241
  button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
242
 
243
- with gr.Tab("Paraphrasing & Grammar Correction"):
244
- t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
245
- button2 = gr.Button("🔄 Paraphrase and Correct")
246
- result2 = gr.Textbox(lines=5, label='Corrected Text')
247
 
248
- button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
249
 
250
- demo.launch(share=True)
 
4
  import spacy
5
  import subprocess
6
  import nltk
7
+ from nltk.corpus import wordnet, stopwords # Import stopwords here
8
  from spellchecker import SpellChecker
9
  import re
10
+ import random
11
+ import string
12
 
13
+ # Ensure necessary NLTK data is downloaded
 
14
  nltk.download('punkt')
15
  nltk.download('stopwords')
16
  nltk.download('averaged_perceptron_tagger')
17
  nltk.download('wordnet')
18
+ nltk.download('omw-1.4')
19
+
20
  top_words = set(stopwords.words("english")) # More efficient as a set
21
 
22
  def plagiarism_removal(text):
23
  def plagiarism_remover(word):
24
  # Handle stopwords, punctuation, and excluded words
25
+ if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
26
  return word
27
 
28
  # Find synonyms
 
55
  return synonym_choice
56
 
57
  # Tokenize, replace words, and join them back
58
+ para_split = nltk.word_tokenize(text)
59
  final_text = [plagiarism_remover(word) for word in para_split]
60
 
61
  # Handle spacing around punctuation correctly
 
68
 
69
  return " ".join(corrected_text)
70
 
 
 
 
 
 
 
71
  # Words we don't want to replace
72
  exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
73
  exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
 
78
  # Initialize the spell checker
79
  spell = SpellChecker()
80
 
 
 
 
 
81
  # Ensure the SpaCy model is installed
82
  try:
83
  nlp = spacy.load("en_core_web_sm")
 
204
  corrected_words.append(word)
205
  return ' '.join(corrected_words)
206
 
 
 
 
207
  # Main function for paraphrasing and grammar correction
208
  def paraphrase_and_correct(text):
 
209
  cleaned_text = remove_redundant_words(text)
210
+ plag_removed = plagiarism_removal(cleaned_text)
211
  paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
212
  paraphrased_text = force_first_letter_capital(paraphrased_text)
213
  paraphrased_text = correct_article_errors(paraphrased_text)
 
229
 
230
  button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
231
 
232
+ with gr.Tab("Paraphrasing and Grammar Correction"):
233
+ t2 = gr.Textbox(lines=5, label='Input Text')
234
+ button2 = gr.Button("🚀 Process!")
235
+ output2 = gr.Textbox(lines=5, label='Processed Text')
236
 
237
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=output2)
238
 
239
+ demo.launch()