Shaltiel commited on
Commit
75209df
1 Parent(s): cb3a4de

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +187 -0
README.md CHANGED
@@ -1,3 +1,190 @@
1
  ---
2
  license: cc-by-4.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-4.0
3
+ language:
4
+ - he
5
  ---
6
+ # DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew
7
+
8
+ State-of-the-art language model for Hebrew, as released [here](link to arxiv).
9
+
10
+ This is the fine-tuned model for the morphological tagging task.
11
+
12
+ Sample usage:
13
+
14
+ ```
15
+ from transformers import AutoModel, AutoTokenizer
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained('dicta-il/dictabert-morph')
18
+ model = AutoModel.from_pretrained('dicta-il/dictabert-morph', trust_remote_code=True)
19
+
20
+ model.eval()
21
+
22
+ sentence = '讘砖谞转 1948 讛砖诇讬诐 讗驻专讬诐 拽讬砖讜谉 讗转 诇讬诪讜讚讬讜 讘驻讬住讜诇 诪转讻转 讜讘转讜诇讚讜转 讛讗诪谞讜转 讜讛讞诇 诇驻专住诐 诪讗诪专讬诐 讛讜诪讜专讬住讟讬讬诐'
23
+ print(model.predict([sentence], tokenizer))
24
+
25
+ ```
26
+
27
+ Output:
28
+ ```json
29
+ [{
30
+ "text": "讘砖谞转 1948 讛砖诇讬诐 讗驻专讬诐 拽讬砖讜谉 讗转 诇讬诪讜讚讬讜 讘驻讬住讜诇 诪转讻转 讜讘转讜诇讚讜转 讛讗诪谞讜转 讜讛讞诇 诇驻专住诐 诪讗诪专讬诐 讛讜诪讜专讬住讟讬讬诐",
31
+ "tokens": [{
32
+ "token": "讘砖谞转",
33
+ "pos": "NOUN",
34
+ "feats": {
35
+ "Gender": "Fem",
36
+ "Number": "Sing"
37
+ },
38
+ "prefixes": ["ADP"],
39
+ "suffix": false
40
+ }, {
41
+ "token": "1948",
42
+ "pos": "NUM",
43
+ "feats": {},
44
+ "prefixes": [],
45
+ "suffix": false
46
+ }, {
47
+ "token": "讛砖诇讬诐",
48
+ "pos": "VERB",
49
+ "feats": {
50
+ "Gender": "Masc",
51
+ "Number": "Sing",
52
+ "Person": "3",
53
+ "Tense": "Past"
54
+ },
55
+ "prefixes": [],
56
+ "suffix": false
57
+ }, {
58
+ "token": "讗驻专讬诐",
59
+ "pos": "PROPN",
60
+ "feats": {},
61
+ "prefixes": [],
62
+ "suffix": false
63
+ }, {
64
+ "token": "拽讬砖讜谉",
65
+ "pos": "PROPN",
66
+ "feats": {},
67
+ "prefixes": [],
68
+ "suffix": false
69
+ }, {
70
+ "token": "讗转",
71
+ "pos": "ADP",
72
+ "feats": {},
73
+ "prefixes": [],
74
+ "suffix": false
75
+ }, {
76
+ "token": "诇讬诪讜讚讬讜",
77
+ "pos": "NOUN",
78
+ "feats": {
79
+ "Gender": "Masc",
80
+ "Number": "Plur"
81
+ },
82
+ "prefixes": [],
83
+ "suffix": "PRON",
84
+ "suffix_feats": {
85
+ "Gender": "Masc",
86
+ "Number": "Sing",
87
+ "Person": "3"
88
+ }
89
+ }, {
90
+ "token": "讘驻讬住讜诇",
91
+ "pos": "NOUN",
92
+ "feats": {
93
+ "Gender": "Masc",
94
+ "Number": "Sing"
95
+ },
96
+ "prefixes": ["ADP"],
97
+ "suffix": false
98
+ }, {
99
+ "token": "诪转讻转",
100
+ "pos": "NOUN",
101
+ "feats": {
102
+ "Gender": "Fem",
103
+ "Number": "Sing"
104
+ },
105
+ "prefixes": [],
106
+ "suffix": false
107
+ }, {
108
+ "token": "讜讘转讜诇讚讜转",
109
+ "pos": "NOUN",
110
+ "feats": {
111
+ "Gender": "Fem",
112
+ "Number": "Plur"
113
+ },
114
+ "prefixes": ["CCONJ", "ADP"],
115
+ "suffix": false
116
+ }, {
117
+ "token": "讛讗诪谞讜转",
118
+ "pos": "NOUN",
119
+ "feats": {
120
+ "Gender": "Fem",
121
+ "Number": "Sing"
122
+ },
123
+ "prefixes": ["DET"],
124
+ "suffix": false
125
+ }, {
126
+ "token": "讜讛讞诇",
127
+ "pos": "VERB",
128
+ "feats": {
129
+ "Gender": "Masc",
130
+ "Number": "Sing",
131
+ "Person": "3",
132
+ "Tense": "Past"
133
+ },
134
+ "prefixes": ["CCONJ"],
135
+ "suffix": false
136
+ }, {
137
+ "token": "诇驻专住诐",
138
+ "pos": "VERB",
139
+ "feats": {},
140
+ "prefixes": [],
141
+ "suffix": false
142
+ }, {
143
+ "token": "诪讗诪专讬诐",
144
+ "pos": "NOUN",
145
+ "feats": {
146
+ "Gender": "Masc",
147
+ "Number": "Plur"
148
+ },
149
+ "prefixes": [],
150
+ "suffix": false
151
+ }, {
152
+ "token": "讛讜诪讜专讬住讟讬讬诐",
153
+ "pos": "ADJ",
154
+ "feats": {
155
+ "Gender": "Masc",
156
+ "Number": "Plur"
157
+ },
158
+ "prefixes": [],
159
+ "suffix": false
160
+ }]
161
+ }]
162
+ ```
163
+
164
+
165
+ ## Citation
166
+
167
+ If you use DictaBERT in your research, please cite ```DictaBERT: A State-of-the-Art BERT Suite for Modern Hebrew```
168
+
169
+ **BibTeX:**
170
+
171
+ To add
172
+
173
+ ## License
174
+
175
+ Shield: [![CC BY 4.0][cc-by-shield]][cc-by]
176
+
177
+ This work is licensed under a
178
+ [Creative Commons Attribution 4.0 International License][cc-by].
179
+
180
+ [![CC BY 4.0][cc-by-image]][cc-by]
181
+
182
+ [cc-by]: http://creativecommons.org/licenses/by/4.0/
183
+ [cc-by-image]: https://i.creativecommons.org/l/by/4.0/88x31.png
184
+ [cc-by-shield]: https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg
185
+
186
+
187
+
188
+
189
+
190
+ `