Spaces:

mamkkl
/

demo1

Paused

mamkkl commited on Jan 10

Commit

7c2ae70

verified ·

1 Parent(s): bc1f27f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -102,10 +102,10 @@ def loadModel():
                     quantization_config=quantization_config,
                     max_memory={
                         0: "5GB",  # GPU 0 with 20GB memory
-                        1: "20GB",  # GPU 0 with 20GB memory
-                        2: "20GB",  # GPU 0 with 20GB memory
-                        3: "20GB",  # GPU 0 with 20GB memory
-                        "cpu": "40GB"  # CPU with 100GB memory
                     },
                 )
         #model = PeftModel.from_pretrained(
@@ -137,7 +137,6 @@ def respond(
     print_resources()
     input_ids = inputs["input_ids"].cuda()
     max_new_tokens = 512
-    print_resources()
     generation_config = GenerationConfig(
             temperature=0.1,
             top_p=0.75,

                     quantization_config=quantization_config,
                     max_memory={
                         0: "5GB",  # GPU 0 with 20GB memory
+                        1: "40GB",  # GPU 0 with 20GB memory
+                        2: "40GB",  # GPU 0 with 20GB memory
+                        3: "40GB",  # GPU 0 with 20GB memory
+                        "cpu": "5GB"  # CPU with 100GB memory
                     },
                 )
         #model = PeftModel.from_pretrained(
     print_resources()
     input_ids = inputs["input_ids"].cuda()
     max_new_tokens = 512
     generation_config = GenerationConfig(
             temperature=0.1,
             top_p=0.75,