Update README.md
Browse files
README.md
CHANGED
@@ -315,4 +315,32 @@ def generate(text):
|
|
315 |
# Now you can simply call the generate function with an English text you want to translate:
|
316 |
generate("I'm super excited about this Norwegian NORA model! Can it translate these sentences?")
|
317 |
# > this should output: 'Jeg er super spent på denne norske NORA modellen! Kan den oversette disse setningene?'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
```
|
|
|
315 |
# Now you can simply call the generate function with an English text you want to translate:
|
316 |
generate("I'm super excited about this Norwegian NORA model! Can it translate these sentences?")
|
317 |
# > this should output: 'Jeg er super spent på denne norske NORA modellen! Kan den oversette disse setningene?'
|
318 |
+
```
|
319 |
+
|
320 |
+
## Example usage on a GPU with ~16GB VRAM (try for yourself [in Google Colab](https://colab.research.google.com/drive/1AQgJ8lN-SNOqkUKj4xpQI5rr0R7V2Xzy?usp=sharing))
|
321 |
+
Install bitsandbytes if you want to load in 8bit
|
322 |
+
|
323 |
+
```bash
|
324 |
+
pip install bitsandbytes
|
325 |
+
pip install accelerate
|
326 |
+
```
|
327 |
+
|
328 |
+
|
329 |
+
```python
|
330 |
+
import torch
|
331 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
332 |
+
|
333 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
334 |
+
"norallm/normistral-7b-scratch"
|
335 |
+
)
|
336 |
+
|
337 |
+
# This setup needs about 8gb VRAM
|
338 |
+
# Setting `load_in_8bit=False` -> 15gb VRAM
|
339 |
+
# Using `torch.float32` and `load_in_8bit=False` -> 21gb VRAM
|
340 |
+
model = AutoModelForCausalLM.from_pretrained(
|
341 |
+
"norallm/normistral-7b-scratch",
|
342 |
+
device_map='auto',
|
343 |
+
load_in_8bit=True,
|
344 |
+
torch_dtype=torch.bfloat16
|
345 |
+
)
|
346 |
```
|