CjangCjengh commited on
Commit
d69485a
·
1 Parent(s): 76c418d

add app.py

Browse files
Files changed (2) hide show
  1. app.py +56 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModel, AutoTokenizer
4
+
5
+
6
+ model_path = 'CjangCjengh/NomBert-hn2qn-v0.1'
7
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
8
+
9
+ model = AutoModel.from_pretrained(model_path, torch_dtype='auto', trust_remote_code=True).eval().to(device)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
11
+
12
+ def parse_text(input_text):
13
+ with torch.inference_mode():
14
+ output_text, output_probs = model.parse_nom_text(tokenizer, [input_text])
15
+
16
+ html_content = '<div>'
17
+ for item in output_probs[0]:
18
+ char = item['char']
19
+ candidates = item['candidates']
20
+ html_content += f'<h3>{char}</h3>'
21
+ html_content += '<div style=\'display: flex; flex-wrap: wrap; gap: 10px;\'>'
22
+
23
+ for candidate, prob in candidates:
24
+ prob_percent = prob * 100
25
+ html_content += f'''
26
+ <div style='margin-bottom: 15px; width: 170px;'>
27
+ <div style='margin-bottom: 5px;'>{candidate}: {prob_percent:.2f}%</div>
28
+ <div style='background-color: #f0f0f0; width: 100%; height: 15px; border-radius: 3px;'>
29
+ <div style='background-color: #4caf50; width: {prob_percent}%; height: 100%; border-radius: 3px;'></div>
30
+ </div>
31
+ </div>
32
+ '''
33
+ html_content += '</div>'
34
+ html_content += '</div>'
35
+
36
+ return output_text[0], html_content
37
+
38
+ if __name__=='__main__':
39
+ with gr.Blocks(css='#viz {height: 500px; overflow-y: scroll;}') as app:
40
+ gr.Markdown('## NomBERT - Hán Nôm to Quốc Ngữ Converter')
41
+ with gr.Row():
42
+ with gr.Column(scale=1):
43
+ input_text = gr.Textbox(label='Input Hán Nôm Text', lines=5, placeholder='Enter Hán Nôm text here...')
44
+ parse_button = gr.Button('Parse')
45
+ output_text = gr.Textbox(label='Output Quốc Ngữ Text', lines=5, interactive=False)
46
+
47
+ with gr.Column(scale=2):
48
+ visualization = gr.HTML(label='Candidates Probabilities', elem_id='viz')
49
+
50
+ parse_button.click(
51
+ fn=parse_text,
52
+ inputs=input_text,
53
+ outputs=[output_text, visualization]
54
+ )
55
+
56
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ transformers==4.49.0