sashtech commited on
Commit
9079e3b
·
verified ·
1 Parent(s): 3b3046e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -78
app.py CHANGED
@@ -1,12 +1,38 @@
1
  import os
 
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
5
- import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from gector.gec_model import GecBERTModel
9
 
 
 
 
 
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
12
 
@@ -23,92 +49,18 @@ nltk.download('omw-1.4')
23
  try:
24
  nlp = spacy.load("en_core_web_sm")
25
  except OSError:
26
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
27
  nlp = spacy.load("en_core_web_sm")
28
 
29
- # Initialize GECToR model for grammar correction
30
- gector_model = GecBERTModel(vocab_path='data/output_vocabulary',
31
- model_paths=['https://grammarly-nlp-data.s3.amazonaws.com/gector/roberta_1_gector.th'],
32
- is_ensemble=False)
33
-
34
  # Function to correct grammar using GECToR
35
  def correct_grammar_with_gector(text):
36
  corrected_sentences = []
37
- sentences = [text] # If you want to split into sentences, you can implement that here
38
  for sentence in sentences:
39
  preds = gector_model.handle_batch([sentence])
40
  corrected_sentences.append(preds[0])
41
  return ' '.join(corrected_sentences)
42
 
43
- # Function to get synonyms using NLTK WordNet (Humanifier)
44
- def get_synonyms_nltk(word, pos):
45
- synsets = wordnet.synsets(word, pos=pos)
46
- if synsets:
47
- lemmas = synsets[0].lemmas()
48
- return [lemma.name() for lemma in lemmas]
49
- return []
50
-
51
- # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
52
- def capitalize_sentences_and_nouns(text):
53
- doc = nlp(text)
54
- corrected_text = []
55
-
56
- for sent in doc.sents:
57
- sentence = []
58
- for token in sent:
59
- if token.i == sent.start: # First word of the sentence
60
- sentence.append(token.text.capitalize())
61
- elif token.pos_ == "PROPN": # Proper noun
62
- sentence.append(token.text.capitalize())
63
- else:
64
- sentence.append(token.text)
65
- corrected_text.append(' '.join(sentence))
66
-
67
- return ' '.join(corrected_text)
68
-
69
- # Paraphrasing function using SpaCy and NLTK (Humanifier)
70
- def paraphrase_with_spacy_nltk(text):
71
- doc = nlp(text)
72
- paraphrased_words = []
73
-
74
- for token in doc:
75
- # Map SpaCy POS tags to WordNet POS tags
76
- pos = None
77
- if token.pos_ in {"NOUN"}:
78
- pos = wordnet.NOUN
79
- elif token.pos_ in {"VERB"}:
80
- pos = wordnet.VERB
81
- elif token.pos_ in {"ADJ"}:
82
- pos = wordnet.ADJ
83
- elif token.pos_ in {"ADV"}:
84
- pos = wordnet.ADV
85
-
86
- synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
87
-
88
- # Replace with a synonym only if it makes sense
89
- if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
90
- paraphrased_words.append(synonyms[0])
91
- else:
92
- paraphrased_words.append(token.text)
93
-
94
- # Join the words back into a sentence
95
- paraphrased_sentence = ' '.join(paraphrased_words)
96
-
97
- # Capitalize sentences and proper nouns
98
- corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
99
-
100
- return corrected_text
101
-
102
- # Combined function: Paraphrase -> Capitalization (Humanifier)
103
- def paraphrase_and_correct(text):
104
- # Step 1: Paraphrase the text
105
- paraphrased_text = paraphrase_with_spacy_nltk(text)
106
-
107
- # Step 2: Capitalize sentences and proper nouns
108
- final_text = capitalize_sentences_and_nouns(paraphrased_text)
109
-
110
- return final_text
111
-
112
  # Gradio app setup with three tabs
113
  with gr.Blocks() as demo:
114
  with gr.Tab("AI Detection"):
@@ -126,7 +78,7 @@ with gr.Blocks() as demo:
126
  output_text = gr.Textbox(label="Paraphrased Text")
127
 
128
  # Connect the paraphrasing function to the button
129
- paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
130
 
131
  with gr.Tab("Grammar Correction"):
132
  grammar_input = gr.Textbox(lines=5, label="Input Text")
 
1
  import os
2
+ import subprocess
3
+ import sys
4
  import gradio as gr
5
  from transformers import pipeline
6
  import spacy
 
7
  import nltk
8
  from nltk.corpus import wordnet
9
+
10
+ # Function to install GECToR
11
+ def install_gector():
12
+ if not os.path.exists('gector'):
13
+ print("Cloning GECToR repository...")
14
+ subprocess.run(["git", "clone", "https://github.com/grammarly/gector.git"], check=True)
15
+
16
+ # Install dependencies from GECToR requirements
17
+ subprocess.run([sys.executable, "-m", "pip", "install", "-r", "gector/requirements.txt"], check=True)
18
+
19
+ # Manually add GECToR to the Python path
20
+ sys.path.append(os.path.abspath('gector'))
21
+
22
+ # Install and import GECToR
23
+ install_gector()
24
+
25
+ # Ensure the gector module path is added correctly
26
+ sys.path.insert(0, os.path.abspath('./gector'))
27
+
28
+ # Import GECToR after installation
29
  from gector.gec_model import GecBERTModel
30
 
31
+ # Initialize GECToR model for grammar correction
32
+ gector_model = GecBERTModel(vocab_path='gector/data/output_vocabulary',
33
+ model_paths=['https://grammarly-nlp-data.s3.amazonaws.com/gector/roberta_1_gector.th'],
34
+ is_ensemble=False)
35
+
36
  # Initialize the English text classification pipeline for AI detection
37
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
38
 
 
49
  try:
50
  nlp = spacy.load("en_core_web_sm")
51
  except OSError:
52
+ subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
53
  nlp = spacy.load("en_core_web_sm")
54
 
 
 
 
 
 
55
  # Function to correct grammar using GECToR
56
  def correct_grammar_with_gector(text):
57
  corrected_sentences = []
58
+ sentences = [text]
59
  for sentence in sentences:
60
  preds = gector_model.handle_batch([sentence])
61
  corrected_sentences.append(preds[0])
62
  return ' '.join(corrected_sentences)
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Gradio app setup with three tabs
65
  with gr.Blocks() as demo:
66
  with gr.Tab("AI Detection"):
 
78
  output_text = gr.Textbox(label="Paraphrased Text")
79
 
80
  # Connect the paraphrasing function to the button
81
+ paraphrase_button.click(correct_grammar_with_gector, inputs=text_input, outputs=output_text)
82
 
83
  with gr.Tab("Grammar Correction"):
84
  grammar_input = gr.Textbox(lines=5, label="Input Text")