adaptiveaiventures commited on
Commit
1b4a88d
·
verified ·
1 Parent(s): b6df890

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +3 -3
Dockerfile CHANGED
@@ -1,7 +1,7 @@
1
  FROM ghcr.io/huggingface/text-generation-inference:latest
2
 
3
- # Define the model to use
4
  ENV MODEL_ID="adaptiveaiventures/Llama-2-7b-chat-finetune"
5
 
6
- # Set the number of GPU shards (1 if using CPU, 2+ if using multiple GPUs)
7
- CMD ["--model-id", "adaptiveaiventures/Llama-2-7b-chat-finetune", "--port", "8080", "--num-shard", "1"]
 
1
  FROM ghcr.io/huggingface/text-generation-inference:latest
2
 
3
+ # Define Model
4
  ENV MODEL_ID="adaptiveaiventures/Llama-2-7b-chat-finetune"
5
 
6
+ # Run the TGI server
7
+ CMD ["--model-id", "${MODEL_ID}", "--port", "8080", "--num-shard", "1", "--dtype", "bfloat16", "--max-batch-prefill-tokens", "1024", "--disable-custom-kernels"]