Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- ms
|
4 |
+
---
|
5 |
+
# Translation Small T5
|
6 |
+
|
7 |
+
Trained on 2048 context length, able to translate malay, english, javanese, banjarese and indonesian to target language. It also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.
|
8 |
+
|
9 |
+
Added more coding translation dataset and do heavy postfilter.
|
10 |
+
|
11 |
+
## how-to
|
12 |
+
|
13 |
+
```python
|
14 |
+
from transformers import T5ForConditionalGeneration, AutoTokenizer
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
16 |
+
'mesolitica/translation-t5-small-standard-bahasa-cased-code',
|
17 |
+
use_fast=False
|
18 |
+
)
|
19 |
+
model = T5ForConditionalGeneration.from_pretrained(
|
20 |
+
'mesolitica/translation-t5-small-standard-bahasa-cased-code '
|
21 |
+
)
|
22 |
+
|
23 |
+
answer = """
|
24 |
+
First, let's start with implementing the `is_number` function, which checks whether the given Variant is number type or not. It checks the type of the Variant and returns whether it is an integer or a real number.
|
25 |
+
|
26 |
+
```cpp
|
27 |
+
#include <cmath>
|
28 |
+
#include <string>
|
29 |
+
|
30 |
+
namespace godot {
|
31 |
+
|
32 |
+
// ... (other Variant function declarations here)
|
33 |
+
|
34 |
+
class Variant {
|
35 |
+
|
36 |
+
// ... (other Variant function definitions here)
|
37 |
+
|
38 |
+
public:
|
39 |
+
bool is_number() const {
|
40 |
+
switch (get_type()) {
|
41 |
+
case INT:
|
42 |
+
case REAL:
|
43 |
+
return true;
|
44 |
+
default:
|
45 |
+
return false;
|
46 |
+
}
|
47 |
+
}
|
48 |
+
|
49 |
+
// ... (other Variant function definitions here)
|
50 |
+
|
51 |
+
};
|
52 |
+
|
53 |
+
} // namespace godot
|
54 |
+
|
55 |
+
```
|
56 |
+
|
57 |
+
Next, we can create the `apply_operation` function, which takes a `Variant` object, an operation (either "+" or "-"), and a `Variant` operand as the input, applying the given operation to the original `Variant` object and the operand if the original `Variant` object is a number. If the `Variant` object is not a number, the function should throw an exception.
|
58 |
+
|
59 |
+
```cpp
|
60 |
+
#include <stdexcept>
|
61 |
+
#include <string>
|
62 |
+
|
63 |
+
// ... (other Variant and function declarations here)
|
64 |
+
|
65 |
+
Variant apply_operation(Variant a, const std::string& operation, Variant b) {
|
66 |
+
if (!a.is_number()) {
|
67 |
+
throw std::invalid_argument("The first Variant is not a number.");
|
68 |
+
}
|
69 |
+
|
70 |
+
if (operation != "+" && operation != "-") {
|
71 |
+
throw std::invalid_argument("Invalid operation. Supported operations are '+' and '-'.");
|
72 |
+
}
|
73 |
+
|
74 |
+
if (a.get_type() == INT) {
|
75 |
+
if (b.is_number()) {
|
76 |
+
if (b.get_type() == INT) {
|
77 |
+
a = a.operator int() + b.operator int();
|
78 |
+
} else { // REAL
|
79 |
+
a = a.operator int() + b.operator double();
|
80 |
+
}
|
81 |
+
} else {
|
82 |
+
throw std::invalid_argument("The second Variant is not a number.");
|
83 |
+
}
|
84 |
+
} else { // REAL
|
85 |
+
if (b.is_number()) {
|
86 |
+
if (b.get_type() == INT) {
|
87 |
+
a = a.operator double() + b.operator int();
|
88 |
+
} else { // REAL
|
89 |
+
a = a.operator double() + b.operator double();
|
90 |
+
}
|
91 |
+
} else {
|
92 |
+
throw std::invalid_argument("The second Variant is not a number.");
|
93 |
+
}
|
94 |
+
}
|
95 |
+
|
96 |
+
if (operation == "-") {
|
97 |
+
a = -a;
|
98 |
+
}
|
99 |
+
|
100 |
+
return a;
|
101 |
+
}
|
102 |
+
|
103 |
+
```
|
104 |
+
"""
|
105 |
+
|
106 |
+
input_ids = tokenizer.encode(f'terjemah ke Melayu: {answer.strip()}', return_tensors = 'pt').cuda()
|
107 |
+
outputs = model.generate(input_ids, max_length = 512)
|
108 |
+
outputs = [o for o in outputs[0] if o not in [0, 1, 2]]
|
109 |
+
print(tokenizer.decode(outputs, spaces_between_special_tokens = False, skip_special_tokens = False))
|
110 |
+
```
|
111 |
+
|
112 |
+
```
|
113 |
+
Pertama, mari kita mulakan dengan melaksanakan fungsi `is_number`, yang memeriksa sama ada Variant yang diberikan adalah jenis nombor atau tidak. Ia memeriksa jenis Variant dan mengembalikan sama ada ia adalah integer atau nombor sebenar.
|
114 |
+
|
115 |
+
```cpp
|
116 |
+
#include <cmath>
|
117 |
+
#include <string>
|
118 |
+
|
119 |
+
namespace godot {
|
120 |
+
|
121 |
+
//... (deklarasi fungsi Variant lain di sini)
|
122 |
+
|
123 |
+
class Variant {
|
124 |
+
|
125 |
+
//... (definisi fungsi Variant lain di sini)
|
126 |
+
|
127 |
+
public:
|
128 |
+
bool is_number() const {
|
129 |
+
switch (get_type()) {
|
130 |
+
case INT:
|
131 |
+
case REAL:
|
132 |
+
return true;
|
133 |
+
default:
|
134 |
+
return false;
|
135 |
+
}
|
136 |
+
}
|
137 |
+
|
138 |
+
//... (definisi fungsi Variant lain di sini)
|
139 |
+
|
140 |
+
};
|
141 |
+
|
142 |
+
} // namespace godot
|
143 |
+
|
144 |
+
```
|
145 |
+
|
146 |
+
Seterusnya, kita boleh membuat fungsi `apply_operation`, yang mengambil objek `Variant`, operasi (sama ada "+" atau "-"), dan operand `Variant` sebagai input, menerapkan operasi yang diberikan ke objek `Variant` asal dan operand jika objek `Variant` asal adalah nombor. Jika objek `Variant` bukan nombor, fungsi harus melemparkan pengecualian.
|
147 |
+
|
148 |
+
```cpp
|
149 |
+
#include <stdexcept>
|
150 |
+
#include <string>
|
151 |
+
|
152 |
+
//
|
153 |
+
import torch
|
154 |
+
```
|