fajjos commited on
Commit
59410ee
·
1 Parent(s): f3f2c42

Add updated app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -3,6 +3,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
3
  import torch
4
  import bitsandbytes as bnb # Required for 4-bit quantization
5
 
 
 
 
6
  # Load the tokenizer and the quantized LLaMA model
7
  model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -11,12 +14,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_name,
13
  load_in_4bit=True, # Enable 4-bit quantization
14
- device_map="auto" # Automatically assigns to CPU/GPU
15
  )
16
 
17
- # Enable native 2x faster inference (if applicable, ensure this feature works)
18
- # FastLanguageModel.for_inference(model) # Uncomment this if FastLanguageModel is available for your model
19
-
20
  # Streamlit interface
21
  st.title("Keyword Extractor using LLaMA 4-bit Model")
22
 
@@ -32,7 +32,7 @@ if user_input:
32
  alpaca_prompt = prompt_template.format(user_input)
33
 
34
  # Tokenize the input text
35
- inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
36
 
37
  # Set up the text streamer to display the generated text as it streams
38
  text_streamer = TextStreamer(tokenizer)
 
3
  import torch
4
  import bitsandbytes as bnb # Required for 4-bit quantization
5
 
6
+ # Check if CUDA is available, and decide on the device
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
  # Load the tokenizer and the quantized LLaMA model
10
  model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_name,
16
  load_in_4bit=True, # Enable 4-bit quantization
17
+ device_map="auto" if device == "cuda" else {"": "cpu"} # Use auto if CUDA is available, else fallback to CPU
18
  )
19
 
 
 
 
20
  # Streamlit interface
21
  st.title("Keyword Extractor using LLaMA 4-bit Model")
22
 
 
32
  alpaca_prompt = prompt_template.format(user_input)
33
 
34
  # Tokenize the input text
35
+ inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(device)
36
 
37
  # Set up the text streamer to display the generated text as it streams
38
  text_streamer = TextStreamer(tokenizer)