Update deploy.json
Browse files- deploy.json +8 -10
deploy.json
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
{
|
2 |
"parameters": {
|
3 |
-
"max_total_tokens": 2048
|
4 |
-
"max_input_length": 1024
|
5 |
-
"max_batch_total_tokens": 8192
|
6 |
-
"max_concurrent_requests": 1
|
7 |
-
"
|
8 |
-
"
|
9 |
-
"max_batch_size": 1,
|
10 |
-
"waiting_served_ratio": 1.2
|
11 |
},
|
12 |
"hardware": {
|
13 |
"task_type": "text-generation",
|
@@ -17,7 +15,7 @@
|
|
17 |
"distributed_setup": false
|
18 |
},
|
19 |
"framework_type": "pytorch",
|
20 |
-
"torch_compile":
|
21 |
"trust_remote_code": true,
|
22 |
-
"disable_custom_kernels":
|
23 |
}
|
|
|
1 |
{
|
2 |
"parameters": {
|
3 |
+
"max_total_tokens": 4096, // Increase from 2048
|
4 |
+
"max_input_length": 2048, // Increase from 1024
|
5 |
+
"max_batch_total_tokens": 16384, // Increase from 8192
|
6 |
+
"max_concurrent_requests": 2, // Increase from 1
|
7 |
+
"max_batch_size": 2, // Increase from 1
|
8 |
+
"waiting_served_ratio": 0.8 // Decrease from 1.2
|
|
|
|
|
9 |
},
|
10 |
"hardware": {
|
11 |
"task_type": "text-generation",
|
|
|
15 |
"distributed_setup": false
|
16 |
},
|
17 |
"framework_type": "pytorch",
|
18 |
+
"torch_compile": true,
|
19 |
"trust_remote_code": true,
|
20 |
+
"disable_custom_kernels": false
|
21 |
}
|