Spaces:
Running
Running
CjangCjengh
commited on
Commit
·
d69485a
1
Parent(s):
76c418d
add app.py
Browse files- app.py +56 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import AutoModel, AutoTokenizer
|
4 |
+
|
5 |
+
|
6 |
+
model_path = 'CjangCjengh/NomBert-hn2qn-v0.1'
|
7 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
8 |
+
|
9 |
+
model = AutoModel.from_pretrained(model_path, torch_dtype='auto', trust_remote_code=True).eval().to(device)
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
11 |
+
|
12 |
+
def parse_text(input_text):
|
13 |
+
with torch.inference_mode():
|
14 |
+
output_text, output_probs = model.parse_nom_text(tokenizer, [input_text])
|
15 |
+
|
16 |
+
html_content = '<div>'
|
17 |
+
for item in output_probs[0]:
|
18 |
+
char = item['char']
|
19 |
+
candidates = item['candidates']
|
20 |
+
html_content += f'<h3>{char}</h3>'
|
21 |
+
html_content += '<div style=\'display: flex; flex-wrap: wrap; gap: 10px;\'>'
|
22 |
+
|
23 |
+
for candidate, prob in candidates:
|
24 |
+
prob_percent = prob * 100
|
25 |
+
html_content += f'''
|
26 |
+
<div style='margin-bottom: 15px; width: 170px;'>
|
27 |
+
<div style='margin-bottom: 5px;'>{candidate}: {prob_percent:.2f}%</div>
|
28 |
+
<div style='background-color: #f0f0f0; width: 100%; height: 15px; border-radius: 3px;'>
|
29 |
+
<div style='background-color: #4caf50; width: {prob_percent}%; height: 100%; border-radius: 3px;'></div>
|
30 |
+
</div>
|
31 |
+
</div>
|
32 |
+
'''
|
33 |
+
html_content += '</div>'
|
34 |
+
html_content += '</div>'
|
35 |
+
|
36 |
+
return output_text[0], html_content
|
37 |
+
|
38 |
+
if __name__=='__main__':
|
39 |
+
with gr.Blocks(css='#viz {height: 500px; overflow-y: scroll;}') as app:
|
40 |
+
gr.Markdown('## NomBERT - Hán Nôm to Quốc Ngữ Converter')
|
41 |
+
with gr.Row():
|
42 |
+
with gr.Column(scale=1):
|
43 |
+
input_text = gr.Textbox(label='Input Hán Nôm Text', lines=5, placeholder='Enter Hán Nôm text here...')
|
44 |
+
parse_button = gr.Button('Parse')
|
45 |
+
output_text = gr.Textbox(label='Output Quốc Ngữ Text', lines=5, interactive=False)
|
46 |
+
|
47 |
+
with gr.Column(scale=2):
|
48 |
+
visualization = gr.HTML(label='Candidates Probabilities', elem_id='viz')
|
49 |
+
|
50 |
+
parse_button.click(
|
51 |
+
fn=parse_text,
|
52 |
+
inputs=input_text,
|
53 |
+
outputs=[output_text, visualization]
|
54 |
+
)
|
55 |
+
|
56 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
transformers==4.49.0
|