from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("/content//TSLAM-4B", use_auth_token=None) | |
model = AutoModelForCausalLM.from_pretrained("/content//TSLAM-4B", use_auth_token=None) | |
# Set device | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Example text input | |
text_input = "How QOS is applied on routers?" | |
p=""" | |
<|system|> | |
You are a helpful assistant.<|end|> | |
<|user|>""" + text_input + """<|end|> | |
<|assistant|> | |
""" | |
# Tokenize and move input to device | |
inputs = tokenizer(p, return_tensors="pt") | |
inputs = inputs.to(device) | |
print("User Query: " + text_input) | |
# Generate text on the device | |
outputs = model.generate(**inputs, max_length=2000, num_return_sequences=1) | |
print("Model Response: ") | |
# Decode generated text | |
for output in outputs: | |
generated_text = tokenizer.decode(output, skip_special_tokens=True) | |
print(generated_text) |