Spaces:
Sleeping
Sleeping
Commit
·
97aced8
1
Parent(s):
f00bbd2
2.11.0
Browse files- README copy.md +12 -0
- app.py +135 -0
- requirements.txt +1 -0
README copy.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Grammar Corrector
|
3 |
+
emoji: 🏢
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.39.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
import subprocess
|
5 |
+
subprocess.run("python3 -m spacy download en".split(" "))
|
6 |
+
data = {"sentences":["I am an good boy.", "I wanted to going to supermarket."]}
|
7 |
+
llama2_url = os.environ['url']
|
8 |
+
|
9 |
+
# UJ
|
10 |
+
import json
|
11 |
+
import errant
|
12 |
+
from tqdm import tqdm
|
13 |
+
import random
|
14 |
+
from difflib import Differ
|
15 |
+
|
16 |
+
annotator = errant.load('en')
|
17 |
+
|
18 |
+
|
19 |
+
error_categories = [
|
20 |
+
'ADJ',
|
21 |
+
'ADJ:FORM',
|
22 |
+
'ADV',
|
23 |
+
'CONJ',
|
24 |
+
'CONTR',
|
25 |
+
'DET',
|
26 |
+
'MORPH',
|
27 |
+
'NOUN',
|
28 |
+
'NOUN:INFL',
|
29 |
+
'NOUN:NUM',
|
30 |
+
'NOUN:POSS',
|
31 |
+
'ORTH',
|
32 |
+
'OTHER',
|
33 |
+
'PART',
|
34 |
+
'PREP',
|
35 |
+
'PRON',
|
36 |
+
'PUNCT',
|
37 |
+
'SPELL',
|
38 |
+
'UNK',
|
39 |
+
'VERB',
|
40 |
+
'VERB:FORM',
|
41 |
+
'VERB:INFL',
|
42 |
+
'VERB:SVA',
|
43 |
+
'VERB:TENSE',
|
44 |
+
'WO',
|
45 |
+
]
|
46 |
+
eng2zh = {
|
47 |
+
'M': '漏掉',
|
48 |
+
'R': '換成',
|
49 |
+
'U': '多餘的',
|
50 |
+
'ADJ': '形容詞',
|
51 |
+
'ADJ:FORM': '形容詞形(比較級或最高級)',
|
52 |
+
'ADV': '副詞',
|
53 |
+
'CONJ': '連接詞',
|
54 |
+
'CONTR': '縮寫',
|
55 |
+
'DET': ' 限定詞(冠詞、指示詞、所有格)',
|
56 |
+
'MORPH': '語尾變化(詞性、單複數、拼字)',
|
57 |
+
'NOUN': '名詞',
|
58 |
+
'NOUN:INFL': '名詞語尾變化(可不可數、單複數、拼字)',
|
59 |
+
'NOUN:NUM': '名詞單複數',
|
60 |
+
'NOUN:POSS': '名詞所有格',
|
61 |
+
'ORTH': '大小寫',
|
62 |
+
'OTHER': '換其它的用法',
|
63 |
+
'PART': '介副詞',
|
64 |
+
'PREP': '介詞',
|
65 |
+
'PRON': '代名詞',
|
66 |
+
'PUNCT': '標點',
|
67 |
+
'SPELL': '拼字',
|
68 |
+
'UNK': '難以歸類',
|
69 |
+
'VERB': '動詞',
|
70 |
+
'VERB:FORM': '動詞形',
|
71 |
+
'VERB:INFL': '動詞詞語尾變化',
|
72 |
+
'VERB:SVA': '主詞動詞一致',
|
73 |
+
'VERB:TENSE': '動詞時態',
|
74 |
+
'WO': '詞序',
|
75 |
+
}
|
76 |
+
color_map = {}
|
77 |
+
for pre in ['M', 'R', 'U']:
|
78 |
+
for err in error_categories:
|
79 |
+
color_map[f'{pre}:{err}'] = {'M': 'red', 'R': 'blue', 'U': 'green'}[pre]
|
80 |
+
def comp(s1, s2):
|
81 |
+
global annotator
|
82 |
+
orig = annotator.parse(s1, tokenise=True)
|
83 |
+
cor = annotator.parse(s2, tokenise=True)
|
84 |
+
edits = annotator.annotate(orig, cor, merging="all-equal")
|
85 |
+
ori_anno = {
|
86 |
+
'text': s1,
|
87 |
+
'entities': []
|
88 |
+
}
|
89 |
+
cor_anno = {
|
90 |
+
'text': s2,
|
91 |
+
'entities': []
|
92 |
+
}
|
93 |
+
for e in edits:
|
94 |
+
typ, content = e.type[0], e.type[2:]
|
95 |
+
print(e.type, typ, content)
|
96 |
+
if typ in eng2zh and content in eng2zh:
|
97 |
+
new_statement = eng2zh[typ]+':' + eng2zh[content]
|
98 |
+
ori_anno['entities'].append({
|
99 |
+
'entity': new_statement,
|
100 |
+
'start': orig[e.o_start:e.o_end].start_char,
|
101 |
+
'end': orig[e.o_start:e.o_end].end_char
|
102 |
+
})
|
103 |
+
cor_anno['entities'].append({
|
104 |
+
'entity': new_statement,
|
105 |
+
'start': cor[e.c_start:e.c_end].start_char,
|
106 |
+
'end': cor[e.c_start:e.c_end].end_char
|
107 |
+
})
|
108 |
+
|
109 |
+
return ori_anno, cor_anno
|
110 |
+
def llama2_all(text):
|
111 |
+
corr = llama2_cor(text)
|
112 |
+
ori_anno, corr_anno = comp(text.strip(), corr.strip())
|
113 |
+
return corr, ori_anno, corr_anno
|
114 |
+
def llama2_cor(text):
|
115 |
+
data = {"sentences": text}
|
116 |
+
r = requests.post(f"{llama2_url}/llama2", json=data)
|
117 |
+
try:
|
118 |
+
json_res = r.json()
|
119 |
+
return json_res['sentences']
|
120 |
+
except: return "Please retry or reboot the LLM server."
|
121 |
+
with gr.Blocks() as demo:
|
122 |
+
with gr.Tab("Llama-2-13b-chat"):
|
123 |
+
with gr.Row():
|
124 |
+
text_input = gr.Textbox(lines=5, label="Input", placeholder="Please enter sentences line by line.")
|
125 |
+
text_output = gr.Textbox(lines=5, label="Output")
|
126 |
+
with gr.Row():
|
127 |
+
text_output01 = gr.HighlightedText(label="Original Text", combine_adjacent=True, line=10).style(color_map=color_map)
|
128 |
+
text_output02 = gr.HighlightedText(label="Corrected Text", combine_adjacent=True, line=10).style(color_map=color_map)
|
129 |
+
with gr.Row():
|
130 |
+
text_button = gr.Button("Correct and Compare")
|
131 |
+
text_button1 = gr.Button("Correct")
|
132 |
+
text_button.click(llama2_all, inputs=text_input, outputs=[text_output, text_output01, text_output02])
|
133 |
+
text_button1.click(llama2_cor, inputs=text_input, outputs=text_output)
|
134 |
+
|
135 |
+
demo.launch(enable_queue=True)
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
errant
|