sashdev commited on
Commit
8ed780d
·
verified ·
1 Parent(s): d123de4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -136
app.py CHANGED
@@ -1,139 +1,36 @@
1
- import os
2
  import gradio as gr
3
- from transformers import pipeline
4
- import spacy
5
- import subprocess
6
- import nltk
7
- from nltk.corpus import wordnet
8
- from nltk.corpus import stopwords
9
- from nltk.tokenize import word_tokenize
10
- from spellchecker import SpellChecker
11
- import re
12
- import string
13
- import random
14
-
15
- # Download necessary NLTK data
16
- nltk.download('punkt')
17
- nltk.download('stopwords')
18
- nltk.download('averaged_perceptron_tagger')
19
- nltk.download('averaged_perceptron_tagger_eng')
20
- nltk.download('wordnet')
21
- nltk.download('omw-1.4')
22
- nltk.download('punkt_tab')
23
-
24
- # Initialize stopwords
25
- stop_words = set(stopwords.words("english"))
26
-
27
- # Words we don't want to replace
28
- exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
29
- exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
30
-
31
- # Initialize the English text classification pipeline for AI detection
32
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
33
-
34
- # Initialize the spell checker
35
- spell = SpellChecker()
36
-
37
- # Ensure the SpaCy model is installed
38
- try:
39
- nlp = spacy.load("en_core_web_sm")
40
- except OSError:
41
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
42
- nlp = spacy.load("en_core_web_sm")
43
-
44
- # Load SpaCy model
45
- nlp = spacy.load("en_core_web_sm")
46
-
47
- # Exclude tags and words (adjusted for better precision)
48
- exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
49
- exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
50
-
51
- def get_synonyms(word):
52
- """Find synonyms for a given word considering the context."""
53
- synonyms = set()
54
- for syn in wordnet.synsets(word):
55
- for lemma in syn.lemmas():
56
- if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
57
- synonyms.add(lemma.name())
58
- return synonyms
59
-
60
- def replace_with_synonyms(word, pos_tag):
61
- """Replace words with synonyms, keeping the original POS tag."""
62
- synonyms = get_synonyms(word)
63
- # Filter by POS tag
64
- filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag]
65
- if filtered_synonyms:
66
- return random.choice(filtered_synonyms)
67
- return word
68
-
69
- def improve_paraphrasing_and_grammar(text):
70
- """Paraphrase and correct grammatical errors in the text."""
71
- doc = nlp(text)
72
- corrected_text = []
73
-
74
- for sent in doc.sents:
75
- sentence = []
76
- for token in sent:
77
- # Replace words with synonyms, excluding special POS tags
78
- if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation:
79
- synonym = replace_with_synonyms(token.text, token.tag_)
80
- sentence.append(synonym if synonym else token.text)
81
- else:
82
- sentence.append(token.text)
83
-
84
- corrected_text.append(' '.join(sentence))
85
-
86
- # Ensure proper punctuation and capitalization
87
- final_text = ' '.join(corrected_text)
88
- final_text = fix_possessives(final_text)
89
- final_text = fix_punctuation_spacing(final_text)
90
- final_text = capitalize_sentences(final_text)
91
- final_text = fix_article_errors(final_text)
92
-
93
- return final_text
94
-
95
- def fix_punctuation_spacing(text):
96
- """Fix spaces before punctuation marks."""
97
- text = re.sub(r'\s+([,.!?])', r'\1', text)
98
- return text
99
-
100
- def fix_possessives(text):
101
- """Correct possessives like 'John ' s' -> 'John's'."""
102
- return re.sub(r"(\w)\s?'\s?s", r"\1's", text)
103
-
104
- def capitalize_sentences(text):
105
- """Capitalize the first letter of each sentence."""
106
- return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)])
107
-
108
- def fix_article_errors(text):
109
- """Correct 'a' and 'an' usage based on following word's sound."""
110
- doc = nlp(text)
111
- corrected = []
112
- for token in doc:
113
- if token.text in ('a', 'an'):
114
- next_token = token.nbor(1)
115
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
116
- corrected.append("an")
117
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
118
- corrected.append("a")
119
- else:
120
- corrected.append(token.text)
121
- else:
122
- corrected.append(token.text)
123
- return ' '.join(corrected)
124
-
125
- # Gradio app setup
126
- def gradio_interface(text):
127
- """Gradio interface function to process the input text."""
128
- return improve_paraphrasing_and_grammar(text)
129
-
130
- with gr.Blocks() as demo:
131
- gr.Markdown("## Text Paraphrasing and Grammar Correction")
132
- text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction')
133
- text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False)
134
- submit_button = gr.Button("🔄 Paraphrase and Correct")
135
-
136
- submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output)
137
 
138
  # Launch the Gradio app
139
- demo.launch(share=True)
 
 
 
 
1
  import gradio as gr
2
+ from gector.gec_model import GecBERTModel
3
+
4
+ # Load the GECToR model
5
+ def load_model():
6
+ model = GecBERTModel(
7
+ vocab_path='data/output_vocabulary',
8
+ model_paths=['data/model_files/xlnet_0_gector.th'],
9
+ max_len=128, min_len=3
10
+ )
11
+ return model
12
+
13
+ # Function to correct grammar using GECToR model
14
+ def correct_grammar(text):
15
+ # Initialize the model (you can load it once and use globally to avoid reloading each time)
16
+ model = load_model()
17
+ # Correct the input text
18
+ corrected_text = model.handle_batch([text])
19
+ return corrected_text[0] # Since the result is a list, return the first (and only) item
20
+
21
+ # Define Gradio interface
22
+ def create_gradio_interface():
23
+ # Input and output in Gradio
24
+ interface = gr.Interface(
25
+ fn=correct_grammar, # Function to run
26
+ inputs="text", # Input is plain text
27
+ outputs="text", # Output is plain text
28
+ title="Grammar Correction App using GECToR",
29
+ description="Enter your text, and this app will correct its grammar using GECToR."
30
+ )
31
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Launch the Gradio app
34
+ if __name__ == "__main__":
35
+ gradio_interface = create_gradio_interface()
36
+ gradio_interface.launch()