sashdev commited on
Commit
9fc880b
Β·
verified Β·
1 Parent(s): 48d050c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -119
app.py CHANGED
@@ -1,121 +1,33 @@
1
- import os
2
  import gradio as gr
3
- from transformers import pipeline
4
- import spacy
5
- import subprocess
6
- import nltk
7
- from nltk.corpus import wordnet
8
- from nltk.corpus import stopwords
9
- from nltk.tokenize import word_tokenize
10
- from spellchecker import SpellChecker
11
- import re
12
- import string
13
- import random
14
-
15
- # Download necessary NLTK data
16
- nltk.download('punkt')
17
- nltk.download('stopwords')
18
- nltk.download('averaged_perceptron_tagger')
19
- nltk.download('averaged_perceptron_tagger_eng')
20
- nltk.download('wordnet')
21
- nltk.download('omw-1.4')
22
- nltk.download('punkt_tab')
23
-
24
- # Initialize stopwords
25
- stop_words = set(stopwords.words("english"))
26
-
27
- # Words we don't want to replace
28
- exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
29
- exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
30
-
31
- # Initialize the English text classification pipeline for AI detection
32
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
33
-
34
- # Initialize the spell checker
35
- spell = SpellChecker()
36
-
37
- # Ensure the SpaCy model is installed
38
- try:
39
- nlp = spacy.load("en_core_web_sm")
40
- except OSError:
41
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
42
- nlp = spacy.load("en_core_web_sm")
43
-
44
- def plagiarism_removal(text):
45
- def plagiarism_remover(word):
46
- if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
47
- return word
48
-
49
- # Find synonyms
50
- synonyms = set()
51
- for syn in wordnet.synsets(word):
52
- for lemma in syn.lemmas():
53
- if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
54
- synonyms.add(lemma.name())
55
-
56
- pos_tag_word = nltk.pos_tag([word])[0]
57
-
58
- if pos_tag_word[1] in exclude_tags:
59
- return word
60
-
61
- filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
62
-
63
- if not filtered_synonyms:
64
- return word
65
-
66
- synonym_choice = random.choice(filtered_synonyms)
67
-
68
- if word.istitle():
69
- return synonym_choice.title()
70
- return synonym_choice
71
-
72
- para_split = word_tokenize(text)
73
- final_text = [plagiarism_remover(word) for word in para_split]
74
 
75
- corrected_text = []
76
- for i in range(len(final_text)):
77
- if final_text[i] in string.punctuation and i > 0:
78
- corrected_text[-1] += final_text[i]
79
- else:
80
- corrected_text.append(final_text[i])
81
-
82
- return " ".join(corrected_text)
83
-
84
- def paraphrase_and_correct(text):
85
- paragraphs = text.split("\n\n") # Split by paragraphs
86
-
87
- # Process each paragraph separately
88
- processed_paragraphs = []
89
- for paragraph in paragraphs:
90
- cleaned_text = remove_redundant_words(paragraph)
91
- plag_removed = plagiarism_removal(cleaned_text)
92
- paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
93
- paraphrased_text = force_first_letter_capital(paraphrased_text)
94
- paraphrased_text = correct_article_errors(paraphrased_text)
95
- paraphrased_text = correct_tense_errors(paraphrased_text)
96
- paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
97
- paraphrased_text = fix_possessives(paraphrased_text)
98
- paraphrased_text = correct_spelling(paraphrased_text)
99
- paraphrased_text = fix_punctuation_spacing(paraphrased_text)
100
- processed_paragraphs.append(paraphrased_text)
101
-
102
- return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
103
-
104
- # Gradio app setup
105
- with gr.Blocks() as demo:
106
- with gr.Tab("AI Detection"):
107
- t1 = gr.Textbox(lines=5, label='Text')
108
- button1 = gr.Button("πŸ€– Predict!")
109
- label1 = gr.Textbox(lines=1, label='Predicted Label πŸŽƒ')
110
- score1 = gr.Textbox(lines=1, label='Prob')
111
-
112
- button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
113
-
114
- with gr.Tab("Paraphrasing & Grammar Correction"):
115
- t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
116
- button2 = gr.Button("πŸ”„ Paraphrase and Correct")
117
- result2 = gr.Textbox(lines=5, label='Corrected Text')
118
-
119
- button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
120
-
121
- demo.launch(share=True)
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ # Load the grammar correction model
6
+ model_name = "microsoft/deberta-v3-base"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
+
10
+ # Function to correct grammar
11
+ def correct_grammar(text):
12
+ # Encode input text
13
+ inputs = tokenizer.encode(text, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Generate the corrected text
16
+ with torch.no_grad():
17
+ outputs = model.generate(inputs, max_length=512, num_beams=5, early_stopping=True)
18
+
19
+ # Decode the corrected text
20
+ corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
+ return corrected_text
22
+
23
+ # Gradio Interface
24
+ interface = gr.Interface(
25
+ fn=correct_grammar,
26
+ inputs="text",
27
+ outputs="text",
28
+ title="Grammar Correction",
29
+ description="Enter a sentence or paragraph to receive grammar corrections using DeBERTa."
30
+ )
31
+
32
+ if __name__ == "__main__":
33
+ interface.launch()