Spaces:

fajjos
/

keyword_extractor

Sleeping

File size: 1,792 Bytes

7cefe7c
bddf041
7cefe7c
bddf041
7cefe7c
bddf041
 
7cefe7c
bddf041
 
 
 
 
 
 
 
 
 
7cefe7c
 
bddf041
 
 
7cefe7c
 
 
bddf041
 
 
 
 
 
 
 
 
 
 
 
 
 
7cefe7c
bddf041

import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch
import bitsandbytes as bnb  # Required for 4-bit quantization

# Load the tokenizer and the quantized LLaMA model
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the quantized LLaMA model in 4-bit precision
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,    # Enable 4-bit quantization
    device_map="auto"     # Automatically assigns to CPU/GPU
)

# Enable native 2x faster inference (if applicable, ensure this feature works)
# FastLanguageModel.for_inference(model)  # Uncomment this if FastLanguageModel is available for your model

# Streamlit interface
st.title("Keyword Extractor using LLaMA 4-bit Model")

# Text input area for user input
user_input = st.text_area("Enter text for keyword extraction")

if user_input:
    # Prepare the prompt for keyword extraction
    prompt_template = (
        "Extract keywords and variables from the prompt:\n"
        "{}\n"
    )
    alpaca_prompt = prompt_template.format(user_input)

    # Tokenize the input text
    inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")

    # Set up the text streamer to display the generated text as it streams
    text_streamer = TextStreamer(tokenizer)

    # Generate keywords and extract variables
    with torch.no_grad():
        output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

    # Decode the output tokens to get the generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Display the result in the Streamlit app
    st.write("Extracted Keywords and Variables:")
    st.write(generated_text)