Update app.py
Browse files
app.py
CHANGED
|
@@ -56,9 +56,9 @@ from peft import LoraConfig, get_peft_model
|
|
| 56 |
# Load LLaMA 2 model in 4-bit mode to save memory
|
| 57 |
model = AutoModelForCausalLM.from_pretrained(
|
| 58 |
model_name,
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
device_map="cpu",
|
| 62 |
quantization_config=None
|
| 63 |
)
|
| 64 |
|
|
|
|
| 56 |
# Load LLaMA 2 model in 4-bit mode to save memory
|
| 57 |
model = AutoModelForCausalLM.from_pretrained(
|
| 58 |
model_name,
|
| 59 |
+
load_in_4bit=True, # Use 4-bit quantization for efficiency
|
| 60 |
+
device_map="auto"
|
| 61 |
+
#device_map="cpu",
|
| 62 |
quantization_config=None
|
| 63 |
)
|
| 64 |
|