sashtech commited on
Commit
d35a2d1
·
verified ·
1 Parent(s): 3b5e5a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -98
app.py CHANGED
@@ -9,11 +9,6 @@ from nltk.corpus import wordnet
9
  # Initialize the English text classification pipeline for AI detection
10
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
11
 
12
- # Function to predict the label and score for English text (AI Detection)
13
- def predict_en(text):
14
- res = pipeline_en(text)[0]
15
- return res['label'], res['score']
16
-
17
  # Ensure necessary NLTK data is downloaded for Humanifier
18
  nltk.download('wordnet')
19
  nltk.download('omw-1.4')
@@ -25,64 +20,35 @@ except OSError:
25
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
- # Function to get synonyms using NLTK WordNet (Humanifier)
29
- def get_synonyms_nltk(word, pos):
30
- synsets = wordnet.synsets(word, pos=pos)
31
- if synsets:
32
- lemmas = synsets[0].lemmas()
33
- return [lemma.name().replace('_', ' ') for lemma in lemmas]
34
- return []
35
-
36
- # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
37
- def capitalize_sentences_and_nouns(text):
38
- doc = nlp(text)
39
- corrected_text = []
40
-
41
- for sent in doc.sents:
42
- sentence = []
43
- for token in sent:
44
- if token.i == sent.start: # First word of the sentence
45
- sentence.append(token.text.capitalize())
46
- elif token.pos_ == "PROPN": # Proper noun
47
- sentence.append(token.text.capitalize())
48
- else:
49
- sentence.append(token.text)
50
- corrected_text.append(' '.join(sentence))
51
-
52
- return ' '.join(corrected_text)
53
-
54
- # Function to correct tense errors in a sentence (Tense Correction)
55
- def correct_tense_errors(text):
56
- doc = nlp(text)
57
  corrected_text = []
58
-
59
  for token in doc:
60
- if token.tag_ in {"VBD", "VBN"} and token.lemma_:
61
- # Convert past tense verbs to their base form
62
- corrected_text.append(token.lemma_)
63
- else:
64
- corrected_text.append(token.text)
65
-
 
 
 
 
 
 
 
66
  return ' '.join(corrected_text)
67
 
68
- # Function to correct singular/plural errors (Singular/Plural Correction)
69
- def correct_singular_plural_errors(text):
70
- doc = nlp(text)
71
  corrected_text = []
72
 
73
  for token in doc:
74
  if token.pos_ == "NOUN":
75
- if token.tag_ == "NN": # Singular noun
76
- if any(child.text.lower() in {'many', 'several', 'few', 'a', 'one'} for child in token.head.children):
77
- corrected_text.append(token.text if token.text.endswith('s') else token.text + 's')
78
- else:
79
- corrected_text.append(token.text)
80
- elif token.tag_ == "NNS": # Plural noun
81
- if any(child.text.lower() in {'a', 'one'} for child in token.head.children):
82
- singular = token.lemma_
83
- corrected_text.append(singular)
84
- else:
85
- corrected_text.append(token.text)
86
  else:
87
  corrected_text.append(token.text)
88
  else:
@@ -90,70 +56,54 @@ def correct_singular_plural_errors(text):
90
 
91
  return ' '.join(corrected_text)
92
 
93
- # Function to check and correct article errors
94
- def correct_article_errors(text):
95
- doc = nlp(text)
96
- corrected_text = []
97
- tokens = list(doc)
98
-
99
- for i, token in enumerate(tokens):
100
- if token.text.lower() in {'a', 'an'}:
101
- if i + 1 < len(tokens):
102
- next_token = tokens[i + 1]
103
- if next_token.text[0].lower() in 'aeiou':
104
- corrected_text.append('an')
105
- else:
106
- corrected_text.append('a')
107
- else:
108
- corrected_text.append(token.text)
109
- else:
110
- corrected_text.append(token.text)
111
- return ' '.join(corrected_text)
112
-
113
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
114
  def paraphrase_with_spacy_nltk(text):
115
  doc = nlp(text)
116
  paraphrased_words = []
117
 
118
  for token in doc:
 
119
  pos = None
120
- if token.pos_ == "NOUN":
121
  pos = wordnet.NOUN
122
- elif token.pos_ == "VERB":
123
  pos = wordnet.VERB
124
- elif token.pos_ == "ADJ":
125
  pos = wordnet.ADJ
126
- elif token.pos_ == "ADV":
127
  pos = wordnet.ADV
128
 
129
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
130
 
131
- # Replace with a synonym only if it's more common and fits the context
132
- if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:
133
- # Avoid replacing with the same word or rare synonyms
134
- synonym = synonyms[0]
135
- if synonym != token.text.lower() and len(synonym.split()) == 1:
136
- paraphrased_words.append(synonym)
137
- else:
138
- paraphrased_words.append(token.text)
139
  else:
140
  paraphrased_words.append(token.text)
141
 
142
- paraphrased_sentence = ' '.join(paraphrased_words)
143
- return paraphrased_sentence
144
 
145
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
146
  def paraphrase_and_correct(text):
147
  # Step 1: Paraphrase the text
148
  paraphrased_text = paraphrase_with_spacy_nltk(text)
149
 
150
- # Step 2: Apply grammatical corrections on the paraphrased text
151
- corrected_text = correct_article_errors(paraphrased_text)
 
 
 
 
152
  corrected_text = capitalize_sentences_and_nouns(corrected_text)
153
- corrected_text = correct_singular_plural_errors(corrected_text)
154
- corrected_text = correct_tense_errors(corrected_text)
155
 
156
- return corrected_text
 
 
 
 
 
 
 
157
 
158
  # Gradio app setup with two tabs
159
  with gr.Blocks() as demo:
@@ -161,16 +111,16 @@ with gr.Blocks() as demo:
161
  t1 = gr.Textbox(lines=5, label='Text')
162
  button1 = gr.Button("🤖 Predict!")
163
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
164
- score1 = gr.Textbox(lines=1, label='Probability')
165
-
166
  # Connect the prediction function to the button
167
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
168
 
169
  with gr.Tab("Humanifier"):
170
- text_input = gr.Textbox(lines=10, label="Input Text")
171
  paraphrase_button = gr.Button("Paraphrase & Correct")
172
  output_text = gr.Textbox(label="Paraphrased Text")
173
-
174
  # Connect the paraphrasing function to the button
175
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
176
 
 
9
  # Initialize the English text classification pipeline for AI detection
10
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
11
 
 
 
 
 
 
12
  # Ensure necessary NLTK data is downloaded for Humanifier
13
  nltk.download('wordnet')
14
  nltk.download('omw-1.4')
 
20
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
21
  nlp = spacy.load("en_core_web_sm")
22
 
23
+ # Function to check subject-verb agreement
24
+ def check_subject_verb_agreement(doc):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  corrected_text = []
 
26
  for token in doc:
27
+ if token.dep_ == "nsubj": # Check if the token is a subject
28
+ subject = token
29
+ verb = token.head # Find the associated verb
30
+ if verb.tag_ in {"VBZ", "VBP"}: # Singular/plural verb forms
31
+ if subject.tag_ == "NNS" and verb.tag_ == "VBZ": # Plural subject with singular verb
32
+ corrected_text.append(verb.lemma_) # Convert verb to plural form
33
+ elif subject.tag_ == "NN" and verb.tag_ == "VBP": # Singular subject with plural verb
34
+ corrected_text.append(verb.lemma_ + 's') # Convert verb to singular form
35
+ else:
36
+ corrected_text.append(verb.text) # No correction needed
37
+ else:
38
+ corrected_text.append(verb.text)
39
+ corrected_text.append(token.text)
40
  return ' '.join(corrected_text)
41
 
42
+ # Function to correct singular/plural errors using dependency parsing
43
+ def correct_singular_plural_errors(doc):
 
44
  corrected_text = []
45
 
46
  for token in doc:
47
  if token.pos_ == "NOUN":
48
+ if token.tag_ == "NN" and token.head.pos_ == "VERB" and token.head.tag_ == "VBP":
49
+ corrected_text.append(token.lemma_ + 's') # Singular noun, plural verb
50
+ elif token.tag_ == "NNS" and token.head.pos_ == "VERB" and token.head.tag_ == "VBZ":
51
+ corrected_text.append(token.lemma_) # Plural noun, singular verb
 
 
 
 
 
 
 
52
  else:
53
  corrected_text.append(token.text)
54
  else:
 
56
 
57
  return ' '.join(corrected_text)
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
60
  def paraphrase_with_spacy_nltk(text):
61
  doc = nlp(text)
62
  paraphrased_words = []
63
 
64
  for token in doc:
65
+ # Map SpaCy POS tags to WordNet POS tags
66
  pos = None
67
+ if token.pos_ in {"NOUN"}:
68
  pos = wordnet.NOUN
69
+ elif token.pos_ in {"VERB"}:
70
  pos = wordnet.VERB
71
+ elif token.pos_ in {"ADJ"}:
72
  pos = wordnet.ADJ
73
+ elif token.pos_ in {"ADV"}:
74
  pos = wordnet.ADV
75
 
76
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
77
 
78
+ # Replace with a synonym only if it makes sense
79
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
80
+ paraphrased_words.append(synonyms[0])
 
 
 
 
 
81
  else:
82
  paraphrased_words.append(token.text)
83
 
84
+ return ' '.join(paraphrased_words)
 
85
 
86
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
87
  def paraphrase_and_correct(text):
88
  # Step 1: Paraphrase the text
89
  paraphrased_text = paraphrase_with_spacy_nltk(text)
90
 
91
+ # Step 2: Parse the text with spaCy
92
+ doc = nlp(paraphrased_text)
93
+
94
+ # Step 3: Apply grammatical corrections on the paraphrased text
95
+ corrected_text = correct_article_errors(doc)
96
+
97
  corrected_text = capitalize_sentences_and_nouns(corrected_text)
 
 
98
 
99
+ corrected_text = check_subject_verb_agreement(nlp(corrected_text))
100
+
101
+ corrected_text = correct_singular_plural_errors(nlp(corrected_text))
102
+
103
+ # Step 4: Capitalize sentences and proper nouns (final correction step)
104
+ final_text = correct_tense_errors(nlp(corrected_text))
105
+
106
+ return final_text
107
 
108
  # Gradio app setup with two tabs
109
  with gr.Blocks() as demo:
 
111
  t1 = gr.Textbox(lines=5, label='Text')
112
  button1 = gr.Button("🤖 Predict!")
113
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
114
+ score1 = gr.Textbox(lines=1, label='Prob')
115
+
116
  # Connect the prediction function to the button
117
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
118
 
119
  with gr.Tab("Humanifier"):
120
+ text_input = gr.Textbox(lines=5, label="Input Text")
121
  paraphrase_button = gr.Button("Paraphrase & Correct")
122
  output_text = gr.Textbox(label="Paraphrased Text")
123
+
124
  # Connect the paraphrasing function to the button
125
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
126