Update README.md
Browse files
README.md
CHANGED
@@ -1,26 +1,31 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
#
|
20 |
-
|
21 |
-
#
|
22 |
-
# {'char': '
|
23 |
-
# {'char': '
|
24 |
-
# {'char': '
|
25 |
-
# ]
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
language:
|
4 |
+
- vi
|
5 |
+
---
|
6 |
+
```python
|
7 |
+
import torch
|
8 |
+
from transformers import AutoModel, AutoTokenizer
|
9 |
+
|
10 |
+
model_path = 'CjangCjengh/NomBert-hn2qn-v0.1'
|
11 |
+
device = 'cuda'
|
12 |
+
|
13 |
+
model = AutoModel.from_pretrained(model_path, torch_dtype='auto', trust_remote_code=True).eval().to(device)
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
15 |
+
|
16 |
+
with torch.inference_mode():
|
17 |
+
output_text, output_probs = model.parse_nom_text(tokenizer, ['仍調𬖉𧡊㐌𤴬疸𢚸'])
|
18 |
+
print(output_text[0])
|
19 |
+
# những điều trông thấy đã đau đớn lòng
|
20 |
+
print(output_probs[0])
|
21 |
+
# [
|
22 |
+
# {'char': '仍', 'candidates': [('những', 0.5237383842468262), ('nhưng', 0.475042462348938), ('dưng', 0.0008663760963827372), ('nhang', 0.00022805406479164958), ('dừng', 8.42325171106495e-05), ('nhẵng', 1.6380783563363366e-05), ('nhùng', 1.5950208762660623e-05), ('nhửng', 3.0440487535088323e-06), ('nhăng', 2.9528700906666927e-06), ('nhẳng', 1.0688020211091498e-06), ('nhừng', 5.84112399337755e-07), ('nhâng', 5.119333650327462e-07)]},
|
23 |
+
# {'char': '調', 'candidates': [('điều', 0.8831620812416077), ('đều', 0.11558306217193604), ('điệu', 0.0012446790933609009), ('dìu', 8.889981472748332e-06), ('điu', 7.615183221787447e-07), ('đìu', 5.942594043517602e-07)]},
|
24 |
+
# {'char': '𬖉', 'candidates': [('trông', 1.0)]},
|
25 |
+
# {'char': '𧡊', 'candidates': [('thấy', 1.0)]},
|
26 |
+
# {'char': '㐌', 'candidates': [('đã', 0.9998464584350586), ('dã', 0.00014108473260421306), ('đà', 1.2395633348205592e-05)]},
|
27 |
+
# {'char': '𤴬', 'candidates': [('đau', 0.9999825954437256), ('đáu', 1.744620021781884e-05)]},
|
28 |
+
# {'char': '疸', 'candidates': [('đớn', 0.9998302459716797), ('đơn', 0.00014517175441142172), ('đảm', 2.457975824654568e-05)]},
|
29 |
+
# {'char': '𢚸', 'candidates': [('lòng', 1.0)]}
|
30 |
+
# ]
|
31 |
+
```
|