Spaces:

mamkkl
/

demo1

Paused

mamkkl commited on Jan 9

Commit

054eaaf

verified ·

1 Parent(s): 80ff3f3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -99,7 +99,11 @@ def loadModel():
                     torch_dtype=torch.float16,
                     cache_dir=cache_dir,
                     device_map="auto",
-                    quantization_config=quantization_config
                 )
         #model = PeftModel.from_pretrained(
         #            model,
@@ -111,7 +115,6 @@ def loadModel():
         #        )
         tokenizer =  AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
         tokenizer.pad_token = tokenizer.unk_token
-        model.to('cuda')
     return model, tokenizer
 model, tokenizer = loadModel()

                     torch_dtype=torch.float16,
                     cache_dir=cache_dir,
                     device_map="auto",
+                    quantization_config=quantization_config,
+                    max_memory={
+                        0: "40GB",  # GPU 0 with 20GB memory
+                        "cpu": "40GB"  # CPU with 100GB memory
+                    },
                 )
         #model = PeftModel.from_pretrained(
         #            model,
         #        )
         tokenizer =  AutoTokenizer.from_pretrained(base_model,use_fast=False,cache_dir=cache_dir)
         tokenizer.pad_token = tokenizer.unk_token
     return model, tokenizer
 model, tokenizer = loadModel()