sashtech commited on
Commit
1fb0b90
·
verified ·
1 Parent(s): e5235ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -45
app.py CHANGED
@@ -1,38 +1,42 @@
1
  import os
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
5
- import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
 
9
- # Initialize the English text classification pipeline for AI detection
10
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
11
-
12
- # Ensure necessary NLTK data is downloaded for Humanifier
13
  nltk.download('wordnet')
14
  nltk.download('omw-1.4')
15
 
16
- # Ensure the SpaCy model is installed for Humanifier
17
  try:
18
  nlp = spacy.load("en_core_web_sm")
19
  except OSError:
20
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
21
  nlp = spacy.load("en_core_web_sm")
22
 
23
- # Function to get synonyms using NLTK WordNet (Humanifier)
 
 
 
 
 
 
 
24
  def get_synonyms_nltk(word, pos):
 
25
  synsets = wordnet.synsets(word, pos=pos)
26
  if synsets:
27
  lemmas = synsets[0].lemmas()
28
  return [lemma.name() for lemma in lemmas]
29
  return []
30
 
31
- # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
32
  def capitalize_sentences_and_nouns(text):
 
33
  doc = nlp(text)
34
  corrected_text = []
35
-
36
  for sent in doc.sents:
37
  sentence = []
38
  for token in sent:
@@ -43,50 +47,42 @@ def capitalize_sentences_and_nouns(text):
43
  else:
44
  sentence.append(token.text)
45
  corrected_text.append(' '.join(sentence))
46
-
47
  return ' '.join(corrected_text)
48
 
49
- # Function to correct tense errors in a sentence (Tense Correction)
50
  def correct_tense_errors(text):
 
51
  doc = nlp(text)
52
  corrected_text = []
53
  for token in doc:
54
- # Check for tense correction based on modal verbs
55
  if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
56
- # Replace with appropriate verb form
57
  lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
58
  corrected_text.append(lemma)
59
  else:
60
  corrected_text.append(token.text)
61
  return ' '.join(corrected_text)
62
 
63
- # Function to correct singular/plural errors (Singular/Plural Correction)
64
  def correct_singular_plural_errors(text):
 
65
  doc = nlp(text)
66
  corrected_text = []
67
-
68
  for token in doc:
69
  if token.pos_ == "NOUN":
70
- # Check if the noun is singular or plural
71
  if token.tag_ == "NN": # Singular noun
72
- # Look for determiners like "many", "several", "few" to correct to plural
73
  if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
74
  corrected_text.append(token.lemma_ + 's')
75
  else:
76
  corrected_text.append(token.text)
77
  elif token.tag_ == "NNS": # Plural noun
78
- # Look for determiners like "a", "one" to correct to singular
79
  if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
80
  corrected_text.append(token.lemma_)
81
  else:
82
  corrected_text.append(token.text)
83
  else:
84
  corrected_text.append(token.text)
85
-
86
  return ' '.join(corrected_text)
87
 
88
- # Function to check and correct article errors
89
  def correct_article_errors(text):
 
90
  doc = nlp(text)
91
  corrected_text = []
92
  for token in doc:
@@ -102,23 +98,10 @@ def correct_article_errors(text):
102
  corrected_text.append(token.text)
103
  return ' '.join(corrected_text)
104
 
105
- # Function to paraphrase and correct grammar
106
  def paraphrase_and_correct(text):
107
- # Enhance rephrasing by using more advanced SpaCy functionalities
108
- paraphrased_text = capitalize_sentences_and_nouns(text) # Start by capitalizing sentences properly
109
-
110
- # Use SpaCy to rephrase by substituting synonyms, restructuring sentences, etc.
111
- doc = nlp(paraphrased_text)
112
- rephrased = []
113
- for token in doc:
114
- synonyms = get_synonyms_nltk(token.text, pos=wordnet.VERB if token.pos_ == "VERB" else wordnet.NOUN)
115
- if synonyms:
116
- rephrased.append(synonyms[0])
117
- else:
118
- rephrased.append(token.text)
119
-
120
- paraphrased_text = ' '.join(rephrased)
121
-
122
  # Apply grammatical corrections
123
  paraphrased_text = correct_article_errors(paraphrased_text)
124
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
@@ -126,24 +109,22 @@ def paraphrase_and_correct(text):
126
 
127
  return paraphrased_text
128
 
129
- # Gradio app setup with two tabs
130
  with gr.Blocks() as demo:
131
  with gr.Tab("AI Detection"):
132
  t1 = gr.Textbox(lines=5, label='Text')
133
  button1 = gr.Button("🤖 Predict!")
134
- label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
135
- score1 = gr.Textbox(lines=1, label='Prob')
 
 
136
 
137
- # Connect the prediction function to the button
138
- button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
139
-
140
  with gr.Tab("Humanifier"):
141
  text_input = gr.Textbox(lines=5, label="Input Text")
142
  paraphrase_button = gr.Button("Paraphrase & Correct")
143
  output_text = gr.Textbox(label="Paraphrased Text")
144
 
145
- # Connect the paraphrasing function to the button
146
- paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
147
 
148
- # Launch the app with the remaining functionalities
149
  demo.launch()
 
1
  import os
2
+ import subprocess
3
  import gradio as gr
4
  from transformers import pipeline
5
  import spacy
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
 
9
+ # Ensure necessary NLTK data is downloaded
 
 
 
10
  nltk.download('wordnet')
11
  nltk.download('omw-1.4')
12
 
13
+ # Ensure the SpaCy model is installed
14
  try:
15
  nlp = spacy.load("en_core_web_sm")
16
  except OSError:
17
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
+ # Initialize the English text classification pipeline for AI detection
21
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
22
+
23
+ def predict_en(text):
24
+ """ Function to predict the label and score for English text (AI Detection) """
25
+ res = pipeline_en(text)[0]
26
+ return res['label'], res['score']
27
+
28
  def get_synonyms_nltk(word, pos):
29
+ """ Function to get synonyms using NLTK WordNet """
30
  synsets = wordnet.synsets(word, pos=pos)
31
  if synsets:
32
  lemmas = synsets[0].lemmas()
33
  return [lemma.name() for lemma in lemmas]
34
  return []
35
 
 
36
  def capitalize_sentences_and_nouns(text):
37
+ """ Function to capitalize the first letter of sentences and proper nouns """
38
  doc = nlp(text)
39
  corrected_text = []
 
40
  for sent in doc.sents:
41
  sentence = []
42
  for token in sent:
 
47
  else:
48
  sentence.append(token.text)
49
  corrected_text.append(' '.join(sentence))
 
50
  return ' '.join(corrected_text)
51
 
 
52
  def correct_tense_errors(text):
53
+ """ Function to correct tense errors in a sentence """
54
  doc = nlp(text)
55
  corrected_text = []
56
  for token in doc:
 
57
  if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
 
58
  lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
59
  corrected_text.append(lemma)
60
  else:
61
  corrected_text.append(token.text)
62
  return ' '.join(corrected_text)
63
 
 
64
  def correct_singular_plural_errors(text):
65
+ """ Function to correct singular/plural errors """
66
  doc = nlp(text)
67
  corrected_text = []
 
68
  for token in doc:
69
  if token.pos_ == "NOUN":
 
70
  if token.tag_ == "NN": # Singular noun
 
71
  if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
72
  corrected_text.append(token.lemma_ + 's')
73
  else:
74
  corrected_text.append(token.text)
75
  elif token.tag_ == "NNS": # Plural noun
 
76
  if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
77
  corrected_text.append(token.lemma_)
78
  else:
79
  corrected_text.append(token.text)
80
  else:
81
  corrected_text.append(token.text)
 
82
  return ' '.join(corrected_text)
83
 
 
84
  def correct_article_errors(text):
85
+ """ Function to check and correct article errors """
86
  doc = nlp(text)
87
  corrected_text = []
88
  for token in doc:
 
98
  corrected_text.append(token.text)
99
  return ' '.join(corrected_text)
100
 
 
101
  def paraphrase_and_correct(text):
102
+ """ Function to paraphrase and correct grammar """
103
+ paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
104
+
 
 
 
 
 
 
 
 
 
 
 
 
105
  # Apply grammatical corrections
106
  paraphrased_text = correct_article_errors(paraphrased_text)
107
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
 
109
 
110
  return paraphrased_text
111
 
112
+ # Setup Gradio interface
113
  with gr.Blocks() as demo:
114
  with gr.Tab("AI Detection"):
115
  t1 = gr.Textbox(lines=5, label='Text')
116
  button1 = gr.Button("🤖 Predict!")
117
+ label1 = gr.Textbox(lines=1, label='Predicted Label')
118
+ score1 = gr.Textbox(lines=1, label='Score')
119
+
120
+ button1.click(predict_en, inputs=[t1], outputs=[label1, score1])
121
 
 
 
 
122
  with gr.Tab("Humanifier"):
123
  text_input = gr.Textbox(lines=5, label="Input Text")
124
  paraphrase_button = gr.Button("Paraphrase & Correct")
125
  output_text = gr.Textbox(label="Paraphrased Text")
126
 
127
+ paraphrase_button.click(paraphrase_and_correct, inputs=[text_input], outputs=[output_text])
 
128
 
129
+ # Launch the app
130
  demo.launch()