from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("gpt2-finetuned-qa") tokenizer = AutoTokenizer.from_pretrained("gpt2-finetuned-qa") while True: prompt = input("Q: ").strip() if prompt.lower() in ["exit", "quit"]: break full_prompt = f"Q: {prompt}\nA:" inputs = tokenizer(full_prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=32, pad_token_id=tokenizer.eos_token_id) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) print()