Update app.py
Browse files
app.py
CHANGED
@@ -102,10 +102,10 @@ def loadModel():
|
|
102 |
quantization_config=quantization_config,
|
103 |
max_memory={
|
104 |
0: "5GB", # GPU 0 with 20GB memory
|
105 |
-
1: "
|
106 |
-
2: "
|
107 |
-
3: "
|
108 |
-
"cpu": "
|
109 |
},
|
110 |
)
|
111 |
#model = PeftModel.from_pretrained(
|
@@ -137,7 +137,6 @@ def respond(
|
|
137 |
print_resources()
|
138 |
input_ids = inputs["input_ids"].cuda()
|
139 |
max_new_tokens = 512
|
140 |
-
print_resources()
|
141 |
generation_config = GenerationConfig(
|
142 |
temperature=0.1,
|
143 |
top_p=0.75,
|
|
|
102 |
quantization_config=quantization_config,
|
103 |
max_memory={
|
104 |
0: "5GB", # GPU 0 with 20GB memory
|
105 |
+
1: "40GB", # GPU 0 with 20GB memory
|
106 |
+
2: "40GB", # GPU 0 with 20GB memory
|
107 |
+
3: "40GB", # GPU 0 with 20GB memory
|
108 |
+
"cpu": "5GB" # CPU with 100GB memory
|
109 |
},
|
110 |
)
|
111 |
#model = PeftModel.from_pretrained(
|
|
|
137 |
print_resources()
|
138 |
input_ids = inputs["input_ids"].cuda()
|
139 |
max_new_tokens = 512
|
|
|
140 |
generation_config = GenerationConfig(
|
141 |
temperature=0.1,
|
142 |
top_p=0.75,
|