Spaces:

mamkkl
/

demo1

Paused

mamkkl commited on Jan 10

Commit

fcd2a7d

verified ·

1 Parent(s): 7c2ae70

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -101,21 +101,28 @@ def loadModel():
                     device_map="auto",
                     quantization_config=quantization_config,
                     max_memory={
-                        0: "5GB",  # GPU 0 with 20GB memory
-                        1: "40GB",  # GPU 0 with 20GB memory
-                        2: "40GB",  # GPU 0 with 20GB memory
-                        3: "40GB",  # GPU 0 with 20GB memory
-                        "cpu": "5GB"  # CPU with 100GB memory
                     },
                 )
-        #model = PeftModel.from_pretrained(
-        #            model,
-        #            lora_weights,
-        #            device_map="auto",
-        #            cache_dir='',
-        #            torch_dtype=torch.float16,
-        #            is_trainable=False,
-        #        )
         tokenizer =  AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
         tokenizer.pad_token = tokenizer.unk_token
     print_resources()

                     device_map="auto",
                     quantization_config=quantization_config,
                     max_memory={
+                        0: "10GB",  # GPU 0 with 20GB memory
+                        1: "22GB",  # GPU 0 with 20GB memory
+                        2: "22GB",  # GPU 0 with 20GB memory
+                        3: "22GB",  # GPU 0 with 20GB memory
+                        #"cpu": "5GB"  # CPU with 100GB memory
+                    },
+                )
+        model = PeftModel.from_pretrained(
+                    model,
+                    lora_weights,
+                    device_map="auto",
+                    cache_dir='',
+                    torch_dtype=torch.float16,
+                    is_trainable=False,
+                    max_memory={
+                        0: "10GB",  # GPU 0 with 20GB memory
+                        1: "22GB",  # GPU 0 with 20GB memory
+                        2: "22GB",  # GPU 0 with 20GB memory
+                        3: "22GB",  # GPU 0 with 20GB memory
+                        #"cpu": "5GB"  # CPU with 100GB memory
                     },
                 )
         tokenizer =  AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
         tokenizer.pad_token = tokenizer.unk_token
     print_resources()