sashtech commited on
Commit
bba85cd
·
verified ·
1 Parent(s): 8bddbf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -39
app.py CHANGED
@@ -5,18 +5,39 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
- from nltk.corpus import stopwords
 
9
  from spellchecker import SpellChecker
10
  import re
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
 
 
13
 
14
- nltk.download('punkt')
15
- nltk.download('stopwords')
16
- nltk.download('averaged_perceptron_tagger')
17
- nltk.download('wordnet')
18
- nltk.download('stopwords')
19
- top_words = set(stopwords.words("english"))
20
 
21
  def plagiarism_removal(text):
22
  def plagiarism_remover(word):
@@ -67,33 +88,6 @@ def plagiarism_removal(text):
67
 
68
  return " ".join(corrected_text)
69
 
70
-
71
-
72
-
73
-
74
-
75
-
76
- # Words we don't want to replace
77
- exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
78
- exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
79
-
80
- # Initialize the English text classification pipeline for AI detection
81
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
82
-
83
- # Initialize the spell checker
84
- spell = SpellChecker()
85
-
86
- # Ensure necessary NLTK data is downloaded
87
- nltk.download('wordnet')
88
- nltk.download('omw-1.4')
89
-
90
- # Ensure the SpaCy model is installed
91
- try:
92
- nlp = spacy.load("en_core_web_sm")
93
- except OSError:
94
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
95
- nlp = spacy.load("en_core_web_sm")
96
-
97
  # Function to predict the label and score for English text (AI Detection)
98
  def predict_en(text):
99
  res = pipeline_en(text)[0]
@@ -213,14 +207,11 @@ def correct_spelling(text):
213
  corrected_words.append(word)
214
  return ' '.join(corrected_words)
215
 
216
-
217
-
218
-
219
  # Main function for paraphrasing and grammar correction
220
  def paraphrase_and_correct(text):
221
- # Add synonym replacement here
222
  cleaned_text = remove_redundant_words(text)
223
- plag_removed=plagiarism_removal(cleaned_text)
224
  paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
225
  paraphrased_text = force_first_letter_capital(paraphrased_text)
226
  paraphrased_text = correct_article_errors(paraphrased_text)
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
  from spellchecker import SpellChecker
11
  import re
12
+ import string
13
+ import random
14
 
15
+ # Download necessary NLTK data
16
+ nltk.download('punkt')
17
+ nltk.download('stopwords')
18
+ nltk.download('averaged_perceptron_tagger')
19
+ nltk.download('wordnet')
20
+ nltk.download('omw-1.4')
21
+
22
+ # Initialize stopwords
23
+ stop_words = set(stopwords.words("english"))
24
+
25
+ # Words we don't want to replace
26
+ exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
27
+ exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
28
+
29
+ # Initialize the English text classification pipeline for AI detection
30
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
31
 
32
+ # Initialize the spell checker
33
+ spell = SpellChecker()
34
 
35
+ # Ensure the SpaCy model is installed
36
+ try:
37
+ nlp = spacy.load("en_core_web_sm")
38
+ except OSError:
39
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
40
+ nlp = spacy.load("en_core_web_sm")
41
 
42
  def plagiarism_removal(text):
43
  def plagiarism_remover(word):
 
88
 
89
  return " ".join(corrected_text)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # Function to predict the label and score for English text (AI Detection)
92
  def predict_en(text):
93
  res = pipeline_en(text)[0]
 
207
  corrected_words.append(word)
208
  return ' '.join(corrected_words)
209
 
 
 
 
210
  # Main function for paraphrasing and grammar correction
211
  def paraphrase_and_correct(text):
212
+ # Add synonym replacement here
213
  cleaned_text = remove_redundant_words(text)
214
+ plag_removed = plagiarism_removal(cleaned_text)
215
  paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
216
  paraphrased_text = force_first_letter_capital(paraphrased_text)
217
  paraphrased_text = correct_article_errors(paraphrased_text)