SamitF commited on
Commit
fa9f85e
·
verified ·
1 Parent(s): d0288b3

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +47 -0
  2. corrector.py +13 -0
  3. examples.txt +15 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from corrector import GrammarCorrector
3
+ import difflib
4
+
5
+ corrector = GrammarCorrector()
6
+
7
+ def highlight_diffs(original, corrected):
8
+ diff = difflib.ndiff(original.split(), corrected.split())
9
+ result = []
10
+ for word in diff:
11
+ if word.startswith("-"):
12
+ result.append(f"~~{word[2:]}~~")
13
+ elif word.startswith("+"):
14
+ result.append(f"**{word[2:]}**")
15
+ elif word.startswith(" "):
16
+ result.append(word[2:])
17
+ return " ".join(result)
18
+
19
+ def fix_sentence(text):
20
+ corrected = corrector.correct(text)
21
+ highlighted = highlight_diffs(text, corrected)
22
+ return corrected, highlighted
23
+
24
+ gr.Interface(
25
+ fn=fix_sentence,
26
+ inputs=gr.Textbox(lines=2, label="Input Sentence"),
27
+ outputs=[
28
+ gr.Textbox(label="Corrected Sentence"),
29
+ gr.Markdown(label="Changes Highlighted")
30
+ ],
31
+ title="Context-Aware Grammar & Spell Checker",
32
+ description="Fixes grammar and spelling using a T5-based model.",
33
+
34
+ examples=[
35
+ ["She go to school every day."],
36
+ ["I can has cheeseburger?"],
37
+ ["The cat sleeped on the mat."],
38
+ ["We was going to the park yesterday."],
39
+ ["This is teh best day of my life!"],
40
+ ["He no went to office today."],
41
+ ["Their coming too the party."],
42
+ ["I hopes you gets better soon."],
43
+ ["Where is you going now?"],
44
+ ["He do not likes pizza."]
45
+ ]
46
+ ).launch()
47
+
corrector.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+
3
+ class GrammarCorrector:
4
+ def __init__(self, model_name="vennify/t5-base-grammar-correction"):
5
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
6
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
+
8
+ def correct(self, text):
9
+ input_text = "gec: " + text # gec: grammar error correction task
10
+ input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
11
+ outputs = self.model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
12
+ corrected_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
13
+ return corrected_text
examples.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ She go to school every day.
2
+ I can has cheeseburger?
3
+ The cat sleeped on the mat.
4
+ We was going to the park yesterday.
5
+ This is teh best day of my life!
6
+ He no went to office today.
7
+ Their coming too the party.
8
+ I hopes you gets better soon.
9
+ Where is you going now?
10
+ He do not likes pizza.
11
+ They was happy with there results.
12
+ This are the answers to your questions.
13
+ My mom sayed I can go.
14
+ Your the best friend I ever had.
15
+ He eated all the cake himself.
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ sentencepiece