Update README.md
Browse files
README.md
CHANGED
@@ -35,7 +35,7 @@ Translate a sentence using python
|
|
35 |
import ctranslate2
|
36 |
import pyonmttok
|
37 |
from huggingface_hub import snapshot_download
|
38 |
-
model_dir = snapshot_download(repo_id="projecte-aina/
|
39 |
|
40 |
tokenizer=pyonmttok.Tokenizer(mode="none", sp_model_path = model_dir + "/spm.model")
|
41 |
tokenized=tokenizer.tokenize("Welcome to the Aina Project!")
|
@@ -89,7 +89,7 @@ The model was trained on a combination of the following datasets:
|
|
89 |
|
90 |
#### Tokenization
|
91 |
|
92 |
-
All data is tokenized using sentencepiece, using 50 thousand token sentencepiece model
|
93 |
This model is included.
|
94 |
|
95 |
#### Hyperparameters
|
|
|
35 |
import ctranslate2
|
36 |
import pyonmttok
|
37 |
from huggingface_hub import snapshot_download
|
38 |
+
model_dir = snapshot_download(repo_id="projecte-aina/aina-translator-en-ca", revision="main")
|
39 |
|
40 |
tokenizer=pyonmttok.Tokenizer(mode="none", sp_model_path = model_dir + "/spm.model")
|
41 |
tokenized=tokenizer.tokenize("Welcome to the Aina Project!")
|
|
|
89 |
|
90 |
#### Tokenization
|
91 |
|
92 |
+
All data is tokenized using sentencepiece, using 50 thousand token sentencepiece model learned from the combination of all filtered training data.
|
93 |
This model is included.
|
94 |
|
95 |
#### Hyperparameters
|