Update app.py
Browse files
app.py
CHANGED
@@ -101,21 +101,28 @@ def loadModel():
|
|
101 |
device_map="auto",
|
102 |
quantization_config=quantization_config,
|
103 |
max_memory={
|
104 |
-
0: "
|
105 |
-
1: "
|
106 |
-
2: "
|
107 |
-
3: "
|
108 |
-
"cpu": "5GB" # CPU with 100GB memory
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
},
|
110 |
)
|
111 |
-
#model = PeftModel.from_pretrained(
|
112 |
-
# model,
|
113 |
-
# lora_weights,
|
114 |
-
# device_map="auto",
|
115 |
-
# cache_dir='',
|
116 |
-
# torch_dtype=torch.float16,
|
117 |
-
# is_trainable=False,
|
118 |
-
# )
|
119 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
120 |
tokenizer.pad_token = tokenizer.unk_token
|
121 |
print_resources()
|
|
|
101 |
device_map="auto",
|
102 |
quantization_config=quantization_config,
|
103 |
max_memory={
|
104 |
+
0: "10GB", # GPU 0 with 20GB memory
|
105 |
+
1: "22GB", # GPU 0 with 20GB memory
|
106 |
+
2: "22GB", # GPU 0 with 20GB memory
|
107 |
+
3: "22GB", # GPU 0 with 20GB memory
|
108 |
+
#"cpu": "5GB" # CPU with 100GB memory
|
109 |
+
},
|
110 |
+
)
|
111 |
+
model = PeftModel.from_pretrained(
|
112 |
+
model,
|
113 |
+
lora_weights,
|
114 |
+
device_map="auto",
|
115 |
+
cache_dir='',
|
116 |
+
torch_dtype=torch.float16,
|
117 |
+
is_trainable=False,
|
118 |
+
max_memory={
|
119 |
+
0: "10GB", # GPU 0 with 20GB memory
|
120 |
+
1: "22GB", # GPU 0 with 20GB memory
|
121 |
+
2: "22GB", # GPU 0 with 20GB memory
|
122 |
+
3: "22GB", # GPU 0 with 20GB memory
|
123 |
+
#"cpu": "5GB" # CPU with 100GB memory
|
124 |
},
|
125 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
|
127 |
tokenizer.pad_token = tokenizer.unk_token
|
128 |
print_resources()
|