Shaltiel commited on
Commit
622f6ab
1 Parent(s): 5a0b659

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +156 -0
README.md CHANGED
@@ -1,3 +1,159 @@
1
  ---
2
  license: cc-by-4.0
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-4.0
3
+ language:
4
+ - he
5
+ inference: false
6
  ---
7
+ # DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew
8
+
9
+ State-of-the-art language model for Hebrew, released [here](https://arxiv.org/abs/2308.16687).
10
+
11
+ This is the fine-tuned model for the syntax dependency tree parsing task.
12
+
13
+ For the bert-base models for other tasks, see [here](https://huggingface.co/collections/dicta-il/dictabert-6588e7cc08f83845fc42a18b).
14
+
15
+ Sample usage:
16
+
17
+ ```python
18
+ from transformers import AutoModel, AutoTokenizer
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained('dicta-il/dictabert-syntax')
21
+ model = AutoModel.from_pretrained('dicta-il/dictabert-syntax', trust_remote_code=True)
22
+
23
+ model.eval()
24
+
25
+ sentence = '讘砖谞转 1948 讛砖诇讬诐 讗驻专讬诐 拽讬砖讜谉 讗转 诇讬诪讜讚讬讜 讘驻讬住讜诇 诪转讻转 讜讘转讜诇讚讜转 讛讗诪谞讜转 讜讛讞诇 诇驻专住诐 诪讗诪专讬诐 讛讜诪讜专讬住讟讬讬诐'
26
+ print(model.predict([sentence], tokenizer))
27
+ ```
28
+
29
+ Output:
30
+ ```json
31
+ [
32
+ {
33
+ "tree": [
34
+ {
35
+ "word": "讘砖谞转",
36
+ "dep_head_idx": 2,
37
+ "dep_head": "讛砖诇讬诐",
38
+ "dep_func": "obl"
39
+ },
40
+ {
41
+ "word": "1948",
42
+ "dep_head_idx": 0,
43
+ "dep_head": "讘砖谞转",
44
+ "dep_func": "compound"
45
+ },
46
+ {
47
+ "word": "讛砖诇讬诐",
48
+ "dep_head_idx": -1,
49
+ "dep_head": "root",
50
+ "dep_func": "root"
51
+ },
52
+ {
53
+ "word": "讗驻专讬诐",
54
+ "dep_head_idx": 2,
55
+ "dep_head": "讛砖诇讬诐",
56
+ "dep_func": "nsubj"
57
+ },
58
+ {
59
+ "word": "拽讬砖讜谉",
60
+ "dep_head_idx": 3,
61
+ "dep_head": "讗驻专讬诐",
62
+ "dep_func": "flat"
63
+ },
64
+ {
65
+ "word": "讗转",
66
+ "dep_head_idx": 6,
67
+ "dep_head": "诇讬诪讜讚讬讜",
68
+ "dep_func": "case"
69
+ },
70
+ {
71
+ "word": "诇讬诪讜讚讬讜",
72
+ "dep_head_idx": 2,
73
+ "dep_head": "讛砖诇讬诐",
74
+ "dep_func": "obj"
75
+ },
76
+ {
77
+ "word": "讘驻讬住讜诇",
78
+ "dep_head_idx": 6,
79
+ "dep_head": "诇讬诪讜讚讬讜",
80
+ "dep_func": "nmod"
81
+ },
82
+ {
83
+ "word": "诪转讻转",
84
+ "dep_head_idx": 7,
85
+ "dep_head": "讘驻讬住讜诇",
86
+ "dep_func": "compound"
87
+ },
88
+ {
89
+ "word": "讜讘转讜诇讚讜转",
90
+ "dep_head_idx": 7,
91
+ "dep_head": "讘驻讬住讜诇",
92
+ "dep_func": "conj"
93
+ },
94
+ {
95
+ "word": "讛讗诪谞讜转",
96
+ "dep_head_idx": 9,
97
+ "dep_head": "讜讘转讜",
98
+ "dep_func": "compound"
99
+ },
100
+ {
101
+ "word": "讜讛讞诇",
102
+ "dep_head_idx": 2,
103
+ "dep_head": "讛砖诇讬诐",
104
+ "dep_func": "conj"
105
+ },
106
+ {
107
+ "word": "诇驻专住诐",
108
+ "dep_head_idx": 11,
109
+ "dep_head": "讜讛讞诇",
110
+ "dep_func": "xcomp"
111
+ },
112
+ {
113
+ "word": "诪讗诪专讬诐",
114
+ "dep_head_idx": 12,
115
+ "dep_head": "诇驻专住诐",
116
+ "dep_func": "obj"
117
+ },
118
+ {
119
+ "word": "讛讜诪讜专讬住讟讬讬诐",
120
+ "dep_head_idx": 13,
121
+ "dep_head": "诪讗诪专讬诐",
122
+ "dep_func": "amod"
123
+ }
124
+ ],
125
+ "root_idx": 2
126
+ }
127
+ ]
128
+ ```
129
+
130
+
131
+ ## Citation
132
+
133
+ If you use DictaBERT in your research, please cite ```DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew```
134
+
135
+ **BibTeX:**
136
+
137
+ ```bibtex
138
+ @misc{shmidman2023dictabert,
139
+ title={DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew},
140
+ author={Shaltiel Shmidman and Avi Shmidman and Moshe Koppel},
141
+ year={2023},
142
+ eprint={2308.16687},
143
+ archivePrefix={arXiv},
144
+ primaryClass={cs.CL}
145
+ }
146
+ ```
147
+
148
+ ## License
149
+
150
+ Shield: [![CC BY 4.0][cc-by-shield]][cc-by]
151
+
152
+ This work is licensed under a
153
+ [Creative Commons Attribution 4.0 International License][cc-by].
154
+
155
+ [![CC BY 4.0][cc-by-image]][cc-by]
156
+
157
+ [cc-by]: http://creativecommons.org/licenses/by/4.0/
158
+ [cc-by-image]: https://i.creativecommons.org/l/by/4.0/88x31.png
159
+ [cc-by-shield]: https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg