Spaces:
Sleeping
Sleeping
=
commited on
Commit
·
c00de41
1
Parent(s):
d78d7ae
send application for test
Browse files
app.py
CHANGED
@@ -25,7 +25,12 @@ models = {
|
|
25 |
"checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3",
|
26 |
"tokenizer": "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json",
|
27 |
"max_len": 51
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
29 |
}
|
30 |
}
|
31 |
|
@@ -43,36 +48,62 @@ temperature = st.sidebar.slider("How randomly need you the translated sentences
|
|
43 |
|
44 |
# make the process
|
45 |
try:
|
46 |
-
# recuperate checkpoints
|
47 |
-
checkpoints = torch.load(os.path.join(models[version][translation_type]['checkpoints'], "best_checkpoints.pth"), map_location=torch.device('cpu'))
|
48 |
-
|
49 |
-
# recuperate the tokenizer
|
50 |
-
tokenizer_file = models[version][translation_type]['tokenizer']
|
51 |
|
52 |
# recuperate the max length
|
53 |
max_len = models[version][translation_type]['max_len']
|
54 |
|
55 |
# let us get the best model
|
56 |
@st.cache_resource
|
57 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# initialize the tokenizer
|
60 |
tokenizer = T5TokenizerFast(tokenizer_file=tokenizer_file)
|
61 |
|
62 |
-
|
63 |
-
model_name = 't5-small'
|
64 |
-
|
65 |
-
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
66 |
|
67 |
# resize the token embeddings
|
68 |
model.resize_token_embeddings(len(tokenizer))
|
69 |
|
70 |
model.load_state_dict(checkpoints['model_state_dict'])
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
return model, tokenizer
|
74 |
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
# set the model to eval mode
|
78 |
_ = model.eval()
|
|
|
25 |
"checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3",
|
26 |
"tokenizer": "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json",
|
27 |
"max_len": 51
|
28 |
+
},
|
29 |
+
"Wolof ➡️ French": {
|
30 |
+
"checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_wf_v3",
|
31 |
+
"tokenizer": "wolof-translate/wolof_translate/trokenizers/t5_tokenizers/tokenizer_v3.json",
|
32 |
+
"max_len": 51
|
33 |
+
}
|
34 |
}
|
35 |
}
|
36 |
|
|
|
48 |
|
49 |
# make the process
|
50 |
try:
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# recuperate the max length
|
53 |
max_len = models[version][translation_type]['max_len']
|
54 |
|
55 |
# let us get the best model
|
56 |
@st.cache_resource
|
57 |
+
def get_modelfw_v3():
|
58 |
+
|
59 |
+
# recuperate checkpoints
|
60 |
+
checkpoints = torch.load(os.path.join('wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3', "best_checkpoints.pth"), map_location=torch.device('cpu'))
|
61 |
+
|
62 |
+
# recuperate the tokenizer
|
63 |
+
tokenizer_file = "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json"
|
64 |
|
65 |
# initialize the tokenizer
|
66 |
tokenizer = T5TokenizerFast(tokenizer_file=tokenizer_file)
|
67 |
|
68 |
+
model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
|
|
|
|
|
|
69 |
|
70 |
# resize the token embeddings
|
71 |
model.resize_token_embeddings(len(tokenizer))
|
72 |
|
73 |
model.load_state_dict(checkpoints['model_state_dict'])
|
74 |
|
75 |
+
return model, tokenizer
|
76 |
+
|
77 |
+
@st.cache_resource
|
78 |
+
def get_modelwf_v3():
|
79 |
+
|
80 |
+
# recuperate checkpoints
|
81 |
+
checkpoints = torch.load(os.path.join('wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_wf_v3', "best_checkpoints.pth"), map_location=torch.device('cpu'))
|
82 |
+
|
83 |
+
# recuperate the tokenizer
|
84 |
+
tokenizer_file = "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json"
|
85 |
+
|
86 |
+
# initialize the tokenizer
|
87 |
+
tokenizer = T5TokenizerFast(tokenizer_file=tokenizer_file)
|
88 |
+
|
89 |
+
model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
90 |
+
|
91 |
+
# resize the token embeddings
|
92 |
+
model.resize_token_embeddings(len(tokenizer))
|
93 |
+
|
94 |
+
model.load_state_dict(checkpoints['model_state_dict'])
|
95 |
|
96 |
return model, tokenizer
|
97 |
|
98 |
+
if version == "Version ☝️":
|
99 |
+
|
100 |
+
if translation_type == "French ➡️ Wolof":
|
101 |
+
|
102 |
+
model, tokenizer = get_modelfw_v3()
|
103 |
+
|
104 |
+
elif translation_type == "Wolof ➡️ French":
|
105 |
+
|
106 |
+
model, tokenizer = get_modelwf_v3()
|
107 |
|
108 |
# set the model to eval mode
|
109 |
_ = model.eval()
|
wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_wf_v3/best_checkpoints.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metrics": {
|
3 |
+
"train_loss": 0.010352298853033019,
|
4 |
+
"test_loss": 0.5878886595368386,
|
5 |
+
"bleu": 22.9625,
|
6 |
+
"gen_len": 9.3562,
|
7 |
+
"current_epoch": 94
|
8 |
+
},
|
9 |
+
"best_performance": {
|
10 |
+
"best_score": 22.9625,
|
11 |
+
"best_epoch": 94
|
12 |
+
}
|
13 |
+
}
|
wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_wf_v3/best_checkpoints.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0427e418e75d5842f8b95ebe9025e91d6e16dd79ab7d6f5815320e239e8b350f
|
3 |
+
size 180980359
|