Updated inference script
Browse files
README.md
CHANGED
@@ -37,7 +37,7 @@ Krutrim Large Language Model (LLM) is a 2 trillion token multilingual foundation
|
|
37 |
|
38 |
| Model Name | Release Date |Release Note | Reference|
|
39 |
|------------|-------------|-------------|-------------|
|
40 |
-
| Krutrim-1-Instruct | 2024-01-31 | SFT on Krutrim-1
|
41 |
|
42 |
|
43 |
## Data Freshness
|
@@ -110,42 +110,34 @@ Krutrim Large Language Model (LLM) is a 2 trillion token multilingual foundation
|
|
110 |
|
111 |
## Usage
|
112 |
|
113 |
-
To run this model, do this:
|
114 |
-
```
|
115 |
-
git clone https://github.com/ola-krutrim/Krutrim-1-7B.git
|
116 |
-
cd Krutrim-1-7B
|
117 |
-
pip install -r requirements.txt
|
118 |
-
```
|
119 |
-
|
120 |
-
To test the base model, you can run
|
121 |
-
```
|
122 |
-
python inference/inference.py
|
123 |
-
```
|
124 |
-
|
125 |
-
To test batch inference of instruct model, you can run
|
126 |
-
```
|
127 |
-
python inference/batch_inference.py
|
128 |
-
```
|
129 |
-
|
130 |
To use the instruct model, you can load it with `AutoModelForCausalLM` as follows:
|
131 |
```
|
132 |
import torch
|
133 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
134 |
|
135 |
-
model_id = "krutrim-ai-labs/Krutrim-1-
|
136 |
# Load model and tokenizer
|
137 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, trust_remote_code=True)
|
138 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
139 |
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
-
|
|
|
143 |
inputs.pop("token_type_ids", None)
|
144 |
|
145 |
# Generate response
|
146 |
outputs = model.generate(
|
147 |
**inputs,
|
148 |
-
max_length=
|
149 |
)
|
150 |
|
151 |
response = tokenizer.decode(outputs[0])
|
|
|
37 |
|
38 |
| Model Name | Release Date |Release Note | Reference|
|
39 |
|------------|-------------|-------------|-------------|
|
40 |
+
| Krutrim-1-Instruct | 2024-01-31 | SFT on Krutrim-1 Base |[Here](https://huggingface.co/krutrim-ai-labs/Krutrim-1-instruct)
|
41 |
|
42 |
|
43 |
## Data Freshness
|
|
|
110 |
|
111 |
## Usage
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
To use the instruct model, you can load it with `AutoModelForCausalLM` as follows:
|
114 |
```
|
115 |
import torch
|
116 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
117 |
|
118 |
+
model_id = "krutrim-ai-labs/Krutrim-1-instruct"
|
119 |
# Load model and tokenizer
|
120 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, trust_remote_code=True)
|
121 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
122 |
|
123 |
+
# Apply Chat Template
|
124 |
+
chat_template ="{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|SYSTEM|> ' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|USER|> ' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|RESPONSE|>\n' + message['content'] + eos_token + '\n' }}{% else %}{{ '<|RESPONSE|>\n' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|RESPONSE|>\n' }}{% endif %}{% endfor %}"
|
125 |
+
tokenizer.chat_template = chat_template
|
126 |
+
|
127 |
+
prompt_dict = [
|
128 |
+
|
129 |
+
{"role": "system", "content": "You are an AI assistant."},
|
130 |
+
{"role": "user", "content": "Who are you?"}
|
131 |
+
]
|
132 |
|
133 |
+
prompts = tokenizer.apply_chat_template(prompt_dict, add_generation_prompt=True, tokenize=False)
|
134 |
+
inputs = tokenizer(prompts, return_tensors='pt').to(device)
|
135 |
inputs.pop("token_type_ids", None)
|
136 |
|
137 |
# Generate response
|
138 |
outputs = model.generate(
|
139 |
**inputs,
|
140 |
+
max_length=100
|
141 |
)
|
142 |
|
143 |
response = tokenizer.decode(outputs[0])
|