from transformers import AutoTokenizer, AutoModelForCausalLM from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig import torch import streamlit as st # Suppress the warning if necessary import warnings warnings.filterwarnings("ignore", category=FutureWarning) # Load PEFT configuration config = PeftConfig.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3") print("step 1") # Load the base model base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit") print("step 2") # Apply PEFT configuration to the base model model = PeftModel.from_pretrained(base_model, "youssef227/llama-3-8b-Instruct-bnb-telcom-3") print("step 3") # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit") # Define the prompt template alpaca_prompt = "{instruction} {input} {output}" def generator(text): # Define the context if it's used in the prompt context = " " # Prepare the inputs inputs = tokenizer( [ alpaca_prompt.format( instruction="انت ممثل خدمة العملاء لدى شركة فودافون.و دي معلومات ممكن تفيدك", # instruction input=text, # input output="" # output - leave this blank for generation! ) ], return_tensors="pt" ).to("cuda") # Generate the output outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True) # Decode the output return tokenizer.batch_decode(outputs, skip_special_tokens=True) # Streamlit application st.title("Text Generator") text = st.text_area('Enter some text!') if text: out = generator(text) st.json(out)