sashtech commited on
Commit
73bd89d
·
verified ·
1 Parent(s): b03c66d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -60
app.py CHANGED
@@ -5,16 +5,10 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
- from pattern.en import conjugate, tenses
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
12
 
13
- # Function to predict the label and score for English text (AI Detection)
14
- def predict_en(text):
15
- res = pipeline_en(text)[0]
16
- return res['label'], res['score']
17
-
18
  # Ensure necessary NLTK data is downloaded for Humanifier
19
  nltk.download('wordnet')
20
  nltk.download('omw-1.4')
@@ -52,59 +46,54 @@ def capitalize_sentences_and_nouns(text):
52
 
53
  return ' '.join(corrected_text)
54
 
55
- # Function to check and correct conjunction errors with 'because' and 'but'
56
- def check_conjunction_errors(text):
57
- # Replace misplaced 'because' and 'but'
58
- text = text.replace("because, ", "because ")
59
- text = text.replace("but, ", "but ")
60
- return text
61
-
62
- # Function to check and correct tense consistency in sentences using Pattern.en
63
- def check_tense_consistency(text):
64
  doc = nlp(text)
65
- corrected_sentences = []
66
-
67
- for sent in doc.sents:
68
- verbs = [token for token in sent if token.pos_ == 'VERB']
69
- if verbs:
70
- # Find the most common tense in the sentence
71
- common_tense = None
72
- for verb in verbs:
73
- verb_tense = tenses(verb.text)
74
- if verb_tense:
75
- common_tense = verb_tense[0][0]
76
- break
77
-
78
- # Conjugate all verbs to the common tense if there's inconsistency
79
- corrected_sentence = []
80
- for token in sent:
81
- if token.pos_ == 'VERB' and common_tense:
82
- corrected_verb = conjugate(token.text, tense=common_tense)
83
- corrected_sentence.append(corrected_verb)
84
- else:
85
- corrected_sentence.append(token.text)
86
- corrected_sentences.append(' '.join(corrected_sentence))
87
  else:
88
- corrected_sentences.append(sent.text)
89
-
90
- return ' '.join(corrected_sentences)
91
 
92
- # Function to check and correct article usage ('a', 'an', 'the')
93
- def check_article_usage(text):
94
  doc = nlp(text)
95
  corrected_text = []
96
-
97
  for token in doc:
98
- if token.text.lower() in ['a', 'an', 'the']:
99
- if token.text.lower() == 'a' and token.head.pos_ in ['NOUN', 'ADJ'] and token.head.text[0] in 'aeiou':
100
- corrected_text.append('an')
101
- elif token.text.lower() == 'an' and token.head.pos_ in ['NOUN', 'ADJ'] and token.head.text[0] not in 'aeiou':
102
- corrected_text.append('a')
 
 
 
103
  else:
104
  corrected_text.append(token.text)
105
  else:
106
  corrected_text.append(token.text)
 
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  return ' '.join(corrected_text)
109
 
110
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
@@ -140,22 +129,18 @@ def paraphrase_with_spacy_nltk(text):
140
 
141
  return corrected_text
142
 
143
- # Combined function: Paraphrase -> Grammar Correction -> Capitalization -> Tense Consistency (Humanifier)
144
  def paraphrase_and_correct(text):
145
  # Step 1: Paraphrase the text
146
  paraphrased_text = paraphrase_with_spacy_nltk(text)
147
 
148
- # Step 2: Check and correct conjunction errors
149
- corrected_conjunctions = check_conjunction_errors(paraphrased_text)
150
-
151
- # Step 3: Check and correct article usage
152
- corrected_articles = check_article_usage(corrected_conjunctions)
153
-
154
- # Step 4: Capitalize sentences and proper nouns
155
- capitalized_text = capitalize_sentences_and_nouns(corrected_articles)
156
 
157
- # Step 5: Check and correct tense consistency
158
- final_text = check_tense_consistency(capitalized_text)
159
 
160
  return final_text
161
 
@@ -175,7 +160,7 @@ with gr.Blocks() as demo:
175
  paraphrase_button = gr.Button("Paraphrase & Correct")
176
  output_text = gr.Textbox(label="Paraphrased Text")
177
 
178
- # Connect the paraphrasing and correction function to the button
179
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
180
 
181
  # Launch the app with the remaining functionalities
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
 
8
 
9
  # Initialize the English text classification pipeline for AI detection
10
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
11
 
 
 
 
 
 
12
  # Ensure necessary NLTK data is downloaded for Humanifier
13
  nltk.download('wordnet')
14
  nltk.download('omw-1.4')
 
46
 
47
  return ' '.join(corrected_text)
48
 
49
+ # Function to correct tense errors in a sentence (Tense Correction)
50
+ def correct_tense_errors(text):
 
 
 
 
 
 
 
51
  doc = nlp(text)
52
+ corrected_text = []
53
+ for token in doc:
54
+ # Check for tense correction based on modal verbs
55
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
56
+ # Replace with appropriate verb form
57
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
58
+ corrected_text.append(lemma)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  else:
60
+ corrected_text.append(token.text)
61
+ return ' '.join(corrected_text)
 
62
 
63
+ # Function to correct singular/plural errors (Singular/Plural Correction)
64
+ def correct_singular_plural_errors(text):
65
  doc = nlp(text)
66
  corrected_text = []
 
67
  for token in doc:
68
+ if token.pos_ == "NOUN" and token.tag_ == "NN":
69
+ if token.dep_ == "nsubj" and any(t.dep_ == "nummod" for t in token.head.children):
70
+ corrected_text.append(token.text + "s")
71
+ else:
72
+ corrected_text.append(token.text)
73
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS":
74
+ if token.dep_ == "nsubj" and not any(t.dep_ == "nummod" for t in token.head.children):
75
+ corrected_text.append(token.lemma_)
76
  else:
77
  corrected_text.append(token.text)
78
  else:
79
  corrected_text.append(token.text)
80
+ return ' '.join(corrected_text)
81
 
82
+ # Function to check and correct article errors
83
+ def correct_article_errors(text):
84
+ doc = nlp(text)
85
+ corrected_text = []
86
+ for token in doc:
87
+ if token.text in ['a', 'an']:
88
+ next_token = token.nbor(1)
89
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
90
+ corrected_text.append("an")
91
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
92
+ corrected_text.append("a")
93
+ else:
94
+ corrected_text.append(token.text)
95
+ else:
96
+ corrected_text.append(token.text)
97
  return ' '.join(corrected_text)
98
 
99
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
 
129
 
130
  return corrected_text
131
 
132
+ # Combined function: Paraphrase -> Capitalization -> Grammar Correction (Humanifier)
133
  def paraphrase_and_correct(text):
134
  # Step 1: Paraphrase the text
135
  paraphrased_text = paraphrase_with_spacy_nltk(text)
136
 
137
+ # Step 2: Apply grammatical corrections
138
+ corrected_text = correct_article_errors(paraphrased_text)
139
+ corrected_text = correct_tense_errors(corrected_text)
140
+ corrected_text = correct_singular_plural_errors(corrected_text)
 
 
 
 
141
 
142
+ # Step 3: Capitalize sentences and proper nouns
143
+ final_text = capitalize_sentences_and_nouns(corrected_text)
144
 
145
  return final_text
146
 
 
160
  paraphrase_button = gr.Button("Paraphrase & Correct")
161
  output_text = gr.Textbox(label="Paraphrased Text")
162
 
163
+ # Connect the paraphrasing function to the button
164
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
165
 
166
  # Launch the app with the remaining functionalities