Spaces:
Sleeping
Sleeping
Add updated app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
|
|
3 |
import torch
|
4 |
import bitsandbytes as bnb # Required for 4-bit quantization
|
5 |
|
|
|
|
|
|
|
6 |
# Load the tokenizer and the quantized LLaMA model
|
7 |
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -11,12 +14,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
11 |
model = AutoModelForCausalLM.from_pretrained(
|
12 |
model_name,
|
13 |
load_in_4bit=True, # Enable 4-bit quantization
|
14 |
-
device_map="auto"
|
15 |
)
|
16 |
|
17 |
-
# Enable native 2x faster inference (if applicable, ensure this feature works)
|
18 |
-
# FastLanguageModel.for_inference(model) # Uncomment this if FastLanguageModel is available for your model
|
19 |
-
|
20 |
# Streamlit interface
|
21 |
st.title("Keyword Extractor using LLaMA 4-bit Model")
|
22 |
|
@@ -32,7 +32,7 @@ if user_input:
|
|
32 |
alpaca_prompt = prompt_template.format(user_input)
|
33 |
|
34 |
# Tokenize the input text
|
35 |
-
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(
|
36 |
|
37 |
# Set up the text streamer to display the generated text as it streams
|
38 |
text_streamer = TextStreamer(tokenizer)
|
|
|
3 |
import torch
|
4 |
import bitsandbytes as bnb # Required for 4-bit quantization
|
5 |
|
6 |
+
# Check if CUDA is available, and decide on the device
|
7 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
8 |
+
|
9 |
# Load the tokenizer and the quantized LLaMA model
|
10 |
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
14 |
model = AutoModelForCausalLM.from_pretrained(
|
15 |
model_name,
|
16 |
load_in_4bit=True, # Enable 4-bit quantization
|
17 |
+
device_map="auto" if device == "cuda" else {"": "cpu"} # Use auto if CUDA is available, else fallback to CPU
|
18 |
)
|
19 |
|
|
|
|
|
|
|
20 |
# Streamlit interface
|
21 |
st.title("Keyword Extractor using LLaMA 4-bit Model")
|
22 |
|
|
|
32 |
alpaca_prompt = prompt_template.format(user_input)
|
33 |
|
34 |
# Tokenize the input text
|
35 |
+
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(device)
|
36 |
|
37 |
# Set up the text streamer to display the generated text as it streams
|
38 |
text_streamer = TextStreamer(tokenizer)
|