englissi commited on
Commit
2b71923
·
verified ·
1 Parent(s): d4e6b5c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
3
+ import gradio as gr
4
+ from nltk.tokenize import sent_tokenize
5
+ from difflib import SequenceMatcher
6
+
7
+ # Ensure the necessary NLTK data is downloaded
8
+ os.system('python download.py')
9
+
10
+ # Load a pre-trained T5 model specifically fine-tuned for grammar correction
11
+ tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
12
+ model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1")
13
+
14
+ # Function to perform grammar correction
15
+ def grammar_check(text):
16
+ sentences = sent_tokenize(text)
17
+ corrected_sentences = []
18
+
19
+ for sentence in sentences:
20
+ input_text = f"gec: {sentence}"
21
+ input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
22
+ outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
23
+ corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ corrected_sentences.append(corrected_sentence)
25
+
26
+ # Function to underline and color revised parts
27
+ def underline_and_color_revisions(original, corrected):
28
+ diff = SequenceMatcher(None, original.split(), corrected.split())
29
+ result = []
30
+ for tag, i1, i2, j1, j2 in diff.get_opcodes():
31
+ if tag == 'insert':
32
+ result.append(f"<u style='color:red;'>{' '.join(corrected.split()[j1:j2])}</u>")
33
+ elif tag == 'replace':
34
+ result.append(f"<u style='color:red;'>{' '.join(corrected.split()[j1:j2])}</u>")
35
+ elif tag == 'equal':
36
+ result.append(' '.join(original.split()[i1:i2]))
37
+ return " ".join(result)
38
+
39
+ corrected_text = " ".join(
40
+ underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences)
41
+ )
42
+
43
+ return corrected_text
44
+
45
+ # Create Gradio interface with a writing prompt
46
+ interface = gr.Interface(
47
+ fn=grammar_check,
48
+ inputs="text",
49
+ outputs="html", # Output type is HTML
50
+ title="Grammar Checker",
51
+ description=(
52
+ "Enter text to check for grammar mistakes.\n\n"
53
+ "Writing Prompt:\n"
54
+ "In the story, Alex and his friends discovered an ancient treasure in Whispering Hollow and decided to donate the artifacts to the local museum.\n\n"
55
+ "In the past, did you have a similar experience where you found something valuable or interesting? Tell the story. Describe what you found, what you did with it, and how you felt about your decision.\n\n"
56
+ "Remember to use past tense in your writing.\n\n"
57
+ "<b>A student's sample answer:</b>\n"
58
+ "<blockquote>When I was 10, I find an old coin in my backyard. I kept it for a while and shows it to my friends. They was impressed and say it might be valuable. Later, I take it to a local antique shop, and the owner told me it was very old. I decided to give it to the museum in my town. The museum was happy and put it on display. I feel proud of my decision.<br><br><i>Copy and paste to try.</i></blockquote>"
59
+ )
60
+ )
61
+
62
+ # Launch the interface
63
+ interface.launch()