from transformers import AutoModelForCausalLM, AutoTokenizer import torch def generate_response(model, tokenizer, prompt, max_length=50): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response def main(): model_name = "meta-llama/Llama-3.2-3B-Instruct" system_prompt = "You are a helpful assistant." tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) user_prompt = input("Enter your prompt: ") full_prompt = f"{system_prompt}\n{user_prompt}" response = generate_response(model, tokenizer, full_prompt) print("Response:", response) if __name__ == "__main__": main()