youssef227's picture
Update app.py
cc6fe85 verified
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig
import torch
import streamlit as st
# Suppress the warning if necessary
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# Load PEFT configuration
config = PeftConfig.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3")
print("step 1")
# Load the base model
base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit")
print("step 2")
# Apply PEFT configuration to the base model
model = PeftModel.from_pretrained(base_model, "youssef227/llama-3-8b-Instruct-bnb-telcom-3")
print("step 3")
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit")
# Define the prompt template
alpaca_prompt = "{instruction} {input} {output}"
def generator(text):
# Define the context if it's used in the prompt
context = " "
# Prepare the inputs
inputs = tokenizer(
[
alpaca_prompt.format(
instruction="ุงู†ุช ู…ู…ุซู„ ุฎุฏู…ุฉ ุงู„ุนู…ู„ุงุก ู„ุฏู‰ ุดุฑูƒุฉ ููˆุฏุงููˆู†.ูˆ ุฏูŠ ู…ุนู„ูˆู…ุงุช ู…ู…ูƒู† ุชููŠุฏูƒ", # instruction
input=text, # input
output="" # output - leave this blank for generation!
)
], return_tensors="pt"
).to("cuda")
# Generate the output
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
# Decode the output
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
# Streamlit application
st.title("Text Generator")
text = st.text_area('Enter some text!')
if text:
out = generator(text)
st.json(out)