Spaces:

fajjos
/

keyword_extractor

Sleeping

File size: 1,769 Bytes

7cefe7c
bddf041
7cefe7c
bddf041
7cefe7c
59410ee
 
 
bddf041
 
7cefe7c
bddf041
 
 
 
 
59410ee
bddf041
 
7cefe7c
bddf041
 
 
7cefe7c
 
 
bddf041
 
 
 
 
 
 
 
59410ee
bddf041
 
 
 
 
7cefe7c
bddf041

import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch
import bitsandbytes as bnb  # Required for 4-bit quantization

# Check if CUDA is available, and decide on the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the tokenizer and the quantized LLaMA model
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the quantized LLaMA model in 4-bit precision
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,    # Enable 4-bit quantization
    device_map="auto" if device == "cuda" else {"": "cpu"}  # Use auto if CUDA is available, else fallback to CPU
)

# Streamlit interface
st.title("Keyword Extractor using LLaMA 4-bit Model")

# Text input area for user input
user_input = st.text_area("Enter text for keyword extraction")

if user_input:
    # Prepare the prompt for keyword extraction
    prompt_template = (
        "Extract keywords and variables from the prompt:\n"
        "{}\n"
    )
    alpaca_prompt = prompt_template.format(user_input)

    # Tokenize the input text
    inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(device)

    # Set up the text streamer to display the generated text as it streams
    text_streamer = TextStreamer(tokenizer)

    # Generate keywords and extract variables
    with torch.no_grad():
        output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

    # Decode the output tokens to get the generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Display the result in the Streamlit app
    st.write("Extracted Keywords and Variables:")
    st.write(generated_text)