DanielHafezi commited on
Commit
28d1b45
·
verified ·
1 Parent(s): cd04c46

Update deploy.json

Browse files
Files changed (1) hide show
  1. deploy.json +8 -10
deploy.json CHANGED
@@ -1,13 +1,11 @@
1
  {
2
  "parameters": {
3
- "max_total_tokens": 2048,
4
- "max_input_length": 1024,
5
- "max_batch_total_tokens": 8192,
6
- "max_concurrent_requests": 1,
7
- "max_best_of": 1,
8
- "max_stop_sequences": 4,
9
- "max_batch_size": 1,
10
- "waiting_served_ratio": 1.2
11
  },
12
  "hardware": {
13
  "task_type": "text-generation",
@@ -17,7 +15,7 @@
17
  "distributed_setup": false
18
  },
19
  "framework_type": "pytorch",
20
- "torch_compile": false,
21
  "trust_remote_code": true,
22
- "disable_custom_kernels": true
23
  }
 
1
  {
2
  "parameters": {
3
+ "max_total_tokens": 4096, // Increase from 2048
4
+ "max_input_length": 2048, // Increase from 1024
5
+ "max_batch_total_tokens": 16384, // Increase from 8192
6
+ "max_concurrent_requests": 2, // Increase from 1
7
+ "max_batch_size": 2, // Increase from 1
8
+ "waiting_served_ratio": 0.8 // Decrease from 1.2
 
 
9
  },
10
  "hardware": {
11
  "task_type": "text-generation",
 
15
  "distributed_setup": false
16
  },
17
  "framework_type": "pytorch",
18
+ "torch_compile": true,
19
  "trust_remote_code": true,
20
+ "disable_custom_kernels": false
21
  }