File size: 795 Bytes
20ddcfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
---
language:
- sw
---
```python
alpaca_prompt = """Hapo chini kuna maelezo ya kazi, pamoja na maelezo ya ziada yanayotoa muktadha zaidi. Andika jibu ambalo linakamilisha ombi hilo ipasavyo.
### Maelezo:
{}
### Ziada:
{}
### Jibu:
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
# Must add EOS_TOKEN, otherwise your generation will go on forever!
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
pass
from datasets import load_dataset
``` |