energybubu commited on
Commit
97aced8
·
1 Parent(s): f00bbd2
Files changed (3) hide show
  1. README copy.md +12 -0
  2. app.py +135 -0
  3. requirements.txt +1 -0
README copy.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Grammar Corrector
3
+ emoji: 🏢
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.39.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ import subprocess
5
+ subprocess.run("python3 -m spacy download en".split(" "))
6
+ data = {"sentences":["I am an good boy.", "I wanted to going to supermarket."]}
7
+ llama2_url = os.environ['url']
8
+
9
+ # UJ
10
+ import json
11
+ import errant
12
+ from tqdm import tqdm
13
+ import random
14
+ from difflib import Differ
15
+
16
+ annotator = errant.load('en')
17
+
18
+
19
+ error_categories = [
20
+ 'ADJ',
21
+ 'ADJ:FORM',
22
+ 'ADV',
23
+ 'CONJ',
24
+ 'CONTR',
25
+ 'DET',
26
+ 'MORPH',
27
+ 'NOUN',
28
+ 'NOUN:INFL',
29
+ 'NOUN:NUM',
30
+ 'NOUN:POSS',
31
+ 'ORTH',
32
+ 'OTHER',
33
+ 'PART',
34
+ 'PREP',
35
+ 'PRON',
36
+ 'PUNCT',
37
+ 'SPELL',
38
+ 'UNK',
39
+ 'VERB',
40
+ 'VERB:FORM',
41
+ 'VERB:INFL',
42
+ 'VERB:SVA',
43
+ 'VERB:TENSE',
44
+ 'WO',
45
+ ]
46
+ eng2zh = {
47
+ 'M': '漏掉',
48
+ 'R': '換成',
49
+ 'U': '多餘的',
50
+ 'ADJ': '形容詞',
51
+ 'ADJ:FORM': '形容詞形(比較級或最高級)',
52
+ 'ADV': '副詞',
53
+ 'CONJ': '連接詞',
54
+ 'CONTR': '縮寫',
55
+ 'DET': ' 限定詞(冠詞、指示詞、所有格)',
56
+ 'MORPH': '語尾變化(詞性、單複數、拼字)',
57
+ 'NOUN': '名詞',
58
+ 'NOUN:INFL': '名詞語尾變化(可不可數、單複數、拼字)',
59
+ 'NOUN:NUM': '名詞單複數',
60
+ 'NOUN:POSS': '名詞所有格',
61
+ 'ORTH': '大小寫',
62
+ 'OTHER': '換其它的用法',
63
+ 'PART': '介副詞',
64
+ 'PREP': '介詞',
65
+ 'PRON': '代名詞',
66
+ 'PUNCT': '標點',
67
+ 'SPELL': '拼字',
68
+ 'UNK': '難以歸類',
69
+ 'VERB': '動詞',
70
+ 'VERB:FORM': '動詞形',
71
+ 'VERB:INFL': '動詞詞語尾變化',
72
+ 'VERB:SVA': '主詞動詞一致',
73
+ 'VERB:TENSE': '動詞時態',
74
+ 'WO': '詞序',
75
+ }
76
+ color_map = {}
77
+ for pre in ['M', 'R', 'U']:
78
+ for err in error_categories:
79
+ color_map[f'{pre}:{err}'] = {'M': 'red', 'R': 'blue', 'U': 'green'}[pre]
80
+ def comp(s1, s2):
81
+ global annotator
82
+ orig = annotator.parse(s1, tokenise=True)
83
+ cor = annotator.parse(s2, tokenise=True)
84
+ edits = annotator.annotate(orig, cor, merging="all-equal")
85
+ ori_anno = {
86
+ 'text': s1,
87
+ 'entities': []
88
+ }
89
+ cor_anno = {
90
+ 'text': s2,
91
+ 'entities': []
92
+ }
93
+ for e in edits:
94
+ typ, content = e.type[0], e.type[2:]
95
+ print(e.type, typ, content)
96
+ if typ in eng2zh and content in eng2zh:
97
+ new_statement = eng2zh[typ]+':' + eng2zh[content]
98
+ ori_anno['entities'].append({
99
+ 'entity': new_statement,
100
+ 'start': orig[e.o_start:e.o_end].start_char,
101
+ 'end': orig[e.o_start:e.o_end].end_char
102
+ })
103
+ cor_anno['entities'].append({
104
+ 'entity': new_statement,
105
+ 'start': cor[e.c_start:e.c_end].start_char,
106
+ 'end': cor[e.c_start:e.c_end].end_char
107
+ })
108
+
109
+ return ori_anno, cor_anno
110
+ def llama2_all(text):
111
+ corr = llama2_cor(text)
112
+ ori_anno, corr_anno = comp(text.strip(), corr.strip())
113
+ return corr, ori_anno, corr_anno
114
+ def llama2_cor(text):
115
+ data = {"sentences": text}
116
+ r = requests.post(f"{llama2_url}/llama2", json=data)
117
+ try:
118
+ json_res = r.json()
119
+ return json_res['sentences']
120
+ except: return "Please retry or reboot the LLM server."
121
+ with gr.Blocks() as demo:
122
+ with gr.Tab("Llama-2-13b-chat"):
123
+ with gr.Row():
124
+ text_input = gr.Textbox(lines=5, label="Input", placeholder="Please enter sentences line by line.")
125
+ text_output = gr.Textbox(lines=5, label="Output")
126
+ with gr.Row():
127
+ text_output01 = gr.HighlightedText(label="Original Text", combine_adjacent=True, line=10).style(color_map=color_map)
128
+ text_output02 = gr.HighlightedText(label="Corrected Text", combine_adjacent=True, line=10).style(color_map=color_map)
129
+ with gr.Row():
130
+ text_button = gr.Button("Correct and Compare")
131
+ text_button1 = gr.Button("Correct")
132
+ text_button.click(llama2_all, inputs=text_input, outputs=[text_output, text_output01, text_output02])
133
+ text_button1.click(llama2_cor, inputs=text_input, outputs=text_output)
134
+
135
+ demo.launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ errant