from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig
import torch
import streamlit as st

# Suppress the warning if necessary
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Load PEFT configuration
config = PeftConfig.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3")
print("step 1")

# Load the base model
base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit")
print("step 2")

# Apply PEFT configuration to the base model
model = PeftModel.from_pretrained(base_model, "youssef227/llama-3-8b-Instruct-bnb-telcom-3")
print("step 3")

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit")

# Define the prompt template
alpaca_prompt = "{instruction} {input} {output}"

def generator(text):
    # Define the context if it's used in the prompt
    context = " "

    # Prepare the inputs
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                instruction="انت ممثل خدمة العملاء لدى شركة فودافون.و دي معلومات ممكن تفيدك", # instruction
                input=text, # input
                output="" # output - leave this blank for generation!
            )
        ], return_tensors="pt"
    ).to("cuda")

    # Generate the output
    outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)

    # Decode the output
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

# Streamlit application
st.title("Text Generator")
text = st.text_area('Enter some text!')

if text:
    out = generator(text)
    st.json(out)