import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel, PeftConfig base_model = "google/gemma-2b-it" adapter_model = "isimorfizam/logs" model = AutoModelForCausalLM.from_pretrained(base_model) model = PeftModel.from_pretrained(model, adapter_model) tokenizer = AutoTokenizer.from_pretrained(base_model) model = model.to("cuda") def predict(question): #input_pretext = 'Answer the following question for me.' input_text = question input_ids = tokenizer(input_text, return_tensors="pt").to("cuda") outputs = model.generate(**input_ids, max_length = 100) return tokenizer.decode(outputs[0]) gradio_app = gr.Interface( predict, inputs=str, outputs=str, title='QA', ) if __name__ == "__main__": gradio_app.launch()