Update app.py
Browse files
app.py
CHANGED
@@ -99,7 +99,11 @@ def loadModel():
|
|
99 |
torch_dtype=torch.float16,
|
100 |
cache_dir=cache_dir,
|
101 |
device_map="auto",
|
102 |
-
quantization_config=quantization_config
|
|
|
|
|
|
|
|
|
103 |
)
|
104 |
#model = PeftModel.from_pretrained(
|
105 |
# model,
|
@@ -111,7 +115,6 @@ def loadModel():
|
|
111 |
# )
|
112 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
113 |
tokenizer.pad_token = tokenizer.unk_token
|
114 |
-
model.to('cuda')
|
115 |
return model, tokenizer
|
116 |
|
117 |
model, tokenizer = loadModel()
|
|
|
99 |
torch_dtype=torch.float16,
|
100 |
cache_dir=cache_dir,
|
101 |
device_map="auto",
|
102 |
+
quantization_config=quantization_config,
|
103 |
+
max_memory={
|
104 |
+
0: "40GB", # GPU 0 with 20GB memory
|
105 |
+
"cpu": "40GB" # CPU with 100GB memory
|
106 |
+
},
|
107 |
)
|
108 |
#model = PeftModel.from_pretrained(
|
109 |
# model,
|
|
|
115 |
# )
|
116 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
117 |
tokenizer.pad_token = tokenizer.unk_token
|
|
|
118 |
return model, tokenizer
|
119 |
|
120 |
model, tokenizer = loadModel()
|