sashtech commited on
Commit
15f7e94
·
verified ·
1 Parent(s): cb8dab7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -52
app.py CHANGED
@@ -5,7 +5,6 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
- from collections import defaultdict
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -29,12 +28,10 @@ except OSError:
29
  # Function to get synonyms using NLTK WordNet (Humanifier)
30
  def get_synonyms_nltk(word, pos):
31
  synsets = wordnet.synsets(word, pos=pos)
32
- synonyms = set()
33
- for synset in synsets:
34
- for lemma in synset.lemmas():
35
- if lemma.name() != word:
36
- synonyms.add(lemma.name())
37
- return list(synonyms)
38
 
39
  # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
40
  def capitalize_sentences_and_nouns(text):
@@ -54,69 +51,53 @@ def capitalize_sentences_and_nouns(text):
54
 
55
  return ' '.join(corrected_text)
56
 
57
- # Function to correct tense errors in a sentence (Tense Correction)
58
  def correct_tense_errors(text):
59
  doc = nlp(text)
60
  corrected_text = []
61
-
62
  for token in doc:
63
  if token.pos_ == "VERB":
64
- # Check if verb is in its base form
65
- if token.tag_ == "VB" and token.text.lower() not in ["be", "have", "do"]:
66
- # Attempt to correct verb form based on sentence context
67
- context = " ".join([t.text for t in doc if t.i != token.i])
68
- corrected_text.append(token.lemma_)
69
  else:
70
  corrected_text.append(token.text)
71
  else:
72
  corrected_text.append(token.text)
73
-
74
  return ' '.join(corrected_text)
75
 
76
- # Function to correct singular/plural errors (Singular/Plural Correction)
77
  def correct_singular_plural_errors(text):
78
  doc = nlp(text)
79
  corrected_text = []
80
-
81
- # Create a context dictionary for singular/plural determination
82
- context = defaultdict(int)
83
- for token in doc:
84
- if token.pos_ == "NOUN":
85
- # Track context for noun usage
86
- if token.tag_ == "NNS":
87
- context['plural'] += 1
88
- elif token.tag_ == "NN":
89
- context['singular'] += 1
90
-
91
  for token in doc:
92
  if token.pos_ == "NOUN":
93
- if token.tag_ == "NN": # Singular noun
94
- if context['plural'] > context['singular']:
95
- corrected_text.append(token.lemma_ + 's')
96
- else:
97
- corrected_text.append(token.text)
98
- elif token.tag_ == "NNS": # Plural noun
99
- if context['singular'] > context['plural']:
100
- corrected_text.append(token.lemma_)
101
- else:
102
- corrected_text.append(token.text)
103
  else:
104
  corrected_text.append(token.text)
105
  else:
106
  corrected_text.append(token.text)
107
-
108
  return ' '.join(corrected_text)
109
 
110
- # Function to check and correct article errors
111
  def correct_article_errors(text):
112
  doc = nlp(text)
113
  corrected_text = []
114
- for token in doc:
115
- if token.text in ['a', 'an']:
 
116
  next_token = token.nbor(1)
117
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
 
118
  corrected_text.append("an")
119
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
120
  corrected_text.append("a")
121
  else:
122
  corrected_text.append(token.text)
@@ -124,32 +105,33 @@ def correct_article_errors(text):
124
  corrected_text.append(token.text)
125
  return ' '.join(corrected_text)
126
 
127
- # Paraphrasing function using SpaCy and NLTK (Humanifier)
128
  def paraphrase_with_spacy_nltk(text):
129
  doc = nlp(text)
130
  paraphrased_words = []
131
 
132
  for token in doc:
133
- # Map SpaCy POS tags to WordNet POS tags
134
  pos = None
135
- if token.pos_ == "NOUN":
136
  pos = wordnet.NOUN
137
- elif token.pos_ == "VERB":
138
  pos = wordnet.VERB
139
- elif token.pos_ == "ADJ":
140
  pos = wordnet.ADJ
141
- elif token.pos_ == "ADV":
142
  pos = wordnet.ADV
143
 
144
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
145
 
146
- # Replace with a synonym only if it makes sense
147
- if synonyms:
148
  paraphrased_words.append(synonyms[0])
149
  else:
150
  paraphrased_words.append(token.text)
151
 
152
- return ' '.join(paraphrased_words)
 
 
153
 
154
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
155
  def paraphrase_and_correct(text):
@@ -160,6 +142,8 @@ def paraphrase_and_correct(text):
160
  corrected_text = correct_article_errors(paraphrased_text)
161
  corrected_text = capitalize_sentences_and_nouns(corrected_text)
162
  corrected_text = correct_singular_plural_errors(corrected_text)
 
 
163
  final_text = correct_tense_errors(corrected_text)
164
 
165
  return final_text
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
 
8
 
9
  # Initialize the English text classification pipeline for AI detection
10
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
28
  # Function to get synonyms using NLTK WordNet (Humanifier)
29
  def get_synonyms_nltk(word, pos):
30
  synsets = wordnet.synsets(word, pos=pos)
31
+ if synsets:
32
+ lemmas = synsets[0].lemmas()
33
+ return [lemma.name() for lemma in lemmas]
34
+ return []
 
 
35
 
36
  # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
37
  def capitalize_sentences_and_nouns(text):
 
51
 
52
  return ' '.join(corrected_text)
53
 
54
+ # Improved Function to correct tense errors in a sentence (Tense Correction)
55
  def correct_tense_errors(text):
56
  doc = nlp(text)
57
  corrected_text = []
 
58
  for token in doc:
59
  if token.pos_ == "VERB":
60
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
61
+ if token.tag_ in {"VB", "VBP"}: # Present tense verb correction
62
+ corrected_text.append(lemma)
63
+ elif token.tag_ in {"VBD", "VBN"}: # Past tense correction
64
+ corrected_text.append(lemma + "ed")
65
  else:
66
  corrected_text.append(token.text)
67
  else:
68
  corrected_text.append(token.text)
 
69
  return ' '.join(corrected_text)
70
 
71
+ # Improved Function to correct singular/plural errors (Singular/Plural Correction)
72
  def correct_singular_plural_errors(text):
73
  doc = nlp(text)
74
  corrected_text = []
75
+
 
 
 
 
 
 
 
 
 
 
76
  for token in doc:
77
  if token.pos_ == "NOUN":
78
+ if token.tag_ == "NN" and token.head.pos_ == "VERB" and token.head.tag_ == "VBZ": # Singular noun with singular verb
79
+ corrected_text.append(token.text + 's') # Make plural
80
+ elif token.tag_ == "NNS" and token.head.pos_ == "VERB" and token.head.tag_ == "VBP": # Plural noun with plural verb
81
+ corrected_text.append(token.lemma_) # Correct to singular
 
 
 
 
 
 
82
  else:
83
  corrected_text.append(token.text)
84
  else:
85
  corrected_text.append(token.text)
86
+
87
  return ' '.join(corrected_text)
88
 
89
+ # Improved Function to check and correct article errors
90
  def correct_article_errors(text):
91
  doc = nlp(text)
92
  corrected_text = []
93
+ vowels = "aeiou"
94
+ for i, token in enumerate(doc):
95
+ if token.text.lower() in ['a', 'an']:
96
  next_token = token.nbor(1)
97
+ next_word_lemma = next_token.lemma_ if next_token.lemma_ else next_token.text
98
+ if token.text == "a" and next_word_lemma[0].lower() in vowels:
99
  corrected_text.append("an")
100
+ elif token.text == "an" and next_word_lemma[0].lower() not in vowels:
101
  corrected_text.append("a")
102
  else:
103
  corrected_text.append(token.text)
 
105
  corrected_text.append(token.text)
106
  return ' '.join(corrected_text)
107
 
108
+ # Enhanced Paraphrasing function using SpaCy and NLTK (Humanifier)
109
  def paraphrase_with_spacy_nltk(text):
110
  doc = nlp(text)
111
  paraphrased_words = []
112
 
113
  for token in doc:
 
114
  pos = None
115
+ if token.pos_ in {"NOUN"}:
116
  pos = wordnet.NOUN
117
+ elif token.pos_ in {"VERB"}:
118
  pos = wordnet.VERB
119
+ elif token.pos_ in {"ADJ"}:
120
  pos = wordnet.ADJ
121
+ elif token.pos_ in {"ADV"}:
122
  pos = wordnet.ADV
123
 
124
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
125
 
126
+ # Replace with a synonym only if it makes sense contextually
127
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
128
  paraphrased_words.append(synonyms[0])
129
  else:
130
  paraphrased_words.append(token.text)
131
 
132
+ paraphrased_sentence = ' '.join(paraphrased_words)
133
+
134
+ return paraphrased_sentence
135
 
136
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
137
  def paraphrase_and_correct(text):
 
142
  corrected_text = correct_article_errors(paraphrased_text)
143
  corrected_text = capitalize_sentences_and_nouns(corrected_text)
144
  corrected_text = correct_singular_plural_errors(corrected_text)
145
+
146
+ # Step 3: Capitalize sentences and proper nouns (final correction step)
147
  final_text = correct_tense_errors(corrected_text)
148
 
149
  return final_text