Sentinel-AI-Web-Search-Test

Sleeping

File size: 941 Bytes

8f4e927
753d9d8
664e897
753d9d8
 
a65ba38
753d9d8
 
34054e0
753d9d8
 
a65ba38
753d9d8
b38068f
a65ba38
 
8f4e927
 
34054e0
753d9d8
8f4e927
34054e0
753d9d8
a65ba38
 
753d9d8
34054e0
 
753d9d8

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Path to the locally saved quantized model directory
model_path = '/path/to/your/quantized_model_directory'

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load quantized model
quantized_model = AutoModelForCausalLM.from_pretrained(model_path)

# Check if a GPU is available and move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
quantized_model.to(device)

# Example text input
text_input = "How did Tesla perform in Q1 2024?"

# Tokenize input
inputs = tokenizer(text_input, return_tensors="pt").to(device)

# Generate response
outputs = quantized_model.generate(**inputs, max_length=150, do_sample=False)

# Decode generated tokens to readable string
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print generated response
print(f"Generated response: {response}")