michaelfeil
commited on
Commit
•
eba8a47
1
Parent(s):
727e312
Update README.md
Browse files
README.md
CHANGED
@@ -106,15 +106,38 @@ tags:
|
|
106 |
- ctranslate2
|
107 |
---
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
Converted 5/13/23 to Ctranslate2
|
110 |
```bash
|
111 |
export ORG="facebook"
|
112 |
export NAME="m2m100_PARAMS"
|
113 |
ct2-transformers-converter --model "$ORG/$NAME" --copy_files .gitattributes README.md generation_config.json sentencepiece.bpe.model special_tokens_map.json tokenizer_config.json vocab.json --quantization float16
|
114 |
```
|
115 |
-
|
116 |
-
|
117 |
-
quantized version of facebook/m2m100_1.2B
|
118 |
|
119 |
```python
|
120 |
import ctranslate2
|
@@ -132,33 +155,7 @@ target = results[0].hypotheses[0][1:]
|
|
132 |
print(tokenizer.decode(tokenizer.convert_tokens_to_ids(target)))
|
133 |
```
|
134 |
|
135 |
-
Alternative:
|
136 |
-
pip install hf_hub_ctranslate2>=1.0.0 ctranslate2>=3.13.0
|
137 |
-
|
138 |
-
Checkpoint compatible to ctranslate2 and hf-hub-ctranslate2
|
139 |
|
140 |
-
compute_type=int8_float16 for device="cuda"
|
141 |
-
compute_type=int8 for device="cpu"
|
142 |
-
```python
|
143 |
-
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
|
144 |
-
|
145 |
-
model_name = "michaelfeil/ct2fast-m2m100_PARAMS"
|
146 |
-
model = TranslatorCT2fromHfHub(
|
147 |
-
# load in int8 on CUDA
|
148 |
-
model_name_or_path=model_name,
|
149 |
-
device="cuda",
|
150 |
-
compute_type="int8_float16"
|
151 |
-
)
|
152 |
-
model.tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_PARAMS")
|
153 |
-
outputs = model.generate(
|
154 |
-
text=["Translate to german: How are you doing?"],
|
155 |
-
min_decoding_length=24,
|
156 |
-
max_decoding_length=32,
|
157 |
-
max_input_length=512,
|
158 |
-
beam_size=5
|
159 |
-
)
|
160 |
-
print(outputs)
|
161 |
-
```
|
162 |
# Original: M2M100 418M
|
163 |
|
164 |
M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation.
|
|
|
106 |
- ctranslate2
|
107 |
---
|
108 |
|
109 |
+
# Fast-Inference with Ctranslate2
|
110 |
+
Speedup inference by 2x-8x using int8 inference in C++
|
111 |
+
quantized version of facebook/m2m100_1.2B
|
112 |
+
|
113 |
+
pip install hf_hub_ctranslate2>=1.0.3 ctranslate2>=3.13.0
|
114 |
+
|
115 |
+
```python
|
116 |
+
from hf_hub_ctranslate2 import MultiLingualTranslatorCT2fromHfHub
|
117 |
+
|
118 |
+
model = MultiLingualTranslatorCT2fromHfHub(
|
119 |
+
model_name_or_path="michaelfeil/ct2fast-m2m100_PARAMS", device="cpu", compute_type="int8",
|
120 |
+
tokenizer=AutoTokenizer.from_pretrained(f"facebook/m2m100_418M")
|
121 |
+
)
|
122 |
+
|
123 |
+
outputs = model.generate(
|
124 |
+
["How do you call a fast Flamingo?", "Wie geht es dir?"],
|
125 |
+
src_lang=["en", "de"],
|
126 |
+
tgt_lang=["de", "fr"]
|
127 |
+
)
|
128 |
+
```
|
129 |
+
|
130 |
+
compute_type=int8_float16 for device="cuda"
|
131 |
+
compute_type=int8 for device="cpu"
|
132 |
+
|
133 |
Converted 5/13/23 to Ctranslate2
|
134 |
```bash
|
135 |
export ORG="facebook"
|
136 |
export NAME="m2m100_PARAMS"
|
137 |
ct2-transformers-converter --model "$ORG/$NAME" --copy_files .gitattributes README.md generation_config.json sentencepiece.bpe.model special_tokens_map.json tokenizer_config.json vocab.json --quantization float16
|
138 |
```
|
139 |
+
|
140 |
+
Alternative
|
|
|
141 |
|
142 |
```python
|
143 |
import ctranslate2
|
|
|
155 |
print(tokenizer.decode(tokenizer.convert_tokens_to_ids(target)))
|
156 |
```
|
157 |
|
|
|
|
|
|
|
|
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
# Original: M2M100 418M
|
160 |
|
161 |
M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation.
|