Spaces:
Sleeping
Sleeping
File size: 1,792 Bytes
7cefe7c bddf041 7cefe7c bddf041 7cefe7c bddf041 7cefe7c bddf041 7cefe7c bddf041 7cefe7c bddf041 7cefe7c bddf041 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch
import bitsandbytes as bnb # Required for 4-bit quantization
# Load the tokenizer and the quantized LLaMA model
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load the quantized LLaMA model in 4-bit precision
model = AutoModelForCausalLM.from_pretrained(
model_name,
load_in_4bit=True, # Enable 4-bit quantization
device_map="auto" # Automatically assigns to CPU/GPU
)
# Enable native 2x faster inference (if applicable, ensure this feature works)
# FastLanguageModel.for_inference(model) # Uncomment this if FastLanguageModel is available for your model
# Streamlit interface
st.title("Keyword Extractor using LLaMA 4-bit Model")
# Text input area for user input
user_input = st.text_area("Enter text for keyword extraction")
if user_input:
# Prepare the prompt for keyword extraction
prompt_template = (
"Extract keywords and variables from the prompt:\n"
"{}\n"
)
alpaca_prompt = prompt_template.format(user_input)
# Tokenize the input text
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
# Set up the text streamer to display the generated text as it streams
text_streamer = TextStreamer(tokenizer)
# Generate keywords and extract variables
with torch.no_grad():
output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
# Decode the output tokens to get the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Display the result in the Streamlit app
st.write("Extracted Keywords and Variables:")
st.write(generated_text)
|