fajjos's picture
Add Streamlit app and requirements
bddf041
raw
history blame
1.79 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch
import bitsandbytes as bnb # Required for 4-bit quantization
# Load the tokenizer and the quantized LLaMA model
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load the quantized LLaMA model in 4-bit precision
model = AutoModelForCausalLM.from_pretrained(
model_name,
load_in_4bit=True, # Enable 4-bit quantization
device_map="auto" # Automatically assigns to CPU/GPU
)
# Enable native 2x faster inference (if applicable, ensure this feature works)
# FastLanguageModel.for_inference(model) # Uncomment this if FastLanguageModel is available for your model
# Streamlit interface
st.title("Keyword Extractor using LLaMA 4-bit Model")
# Text input area for user input
user_input = st.text_area("Enter text for keyword extraction")
if user_input:
# Prepare the prompt for keyword extraction
prompt_template = (
"Extract keywords and variables from the prompt:\n"
"{}\n"
)
alpaca_prompt = prompt_template.format(user_input)
# Tokenize the input text
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
# Set up the text streamer to display the generated text as it streams
text_streamer = TextStreamer(tokenizer)
# Generate keywords and extract variables
with torch.no_grad():
output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
# Decode the output tokens to get the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Display the result in the Streamlit app
st.write("Extracted Keywords and Variables:")
st.write(generated_text)