Spaces:

adaptiveaiventures
/

llama2-interference

Runtime error

adaptiveaiventures commited on Jan 18

Commit

ba00a28

verified ·

1 Parent(s): b2c1ee4

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -3,5 +3,10 @@ FROM ghcr.io/huggingface/text-generation-inference:latest
 # Define Model
 ENV MODEL_ID="adaptiveaiventures/Llama-2-7b-chat-finetune"
 # Run the TGI server
 CMD ["--model-id", "adaptiveaiventures/Llama-2-7b-chat-finetune", "--port", "8080", "--num-shard", "1", "--dtype", "bfloat16", "--max-batch-prefill-tokens", "1024", "--disable-custom-kernels"]

 # Define Model
 ENV MODEL_ID="adaptiveaiventures/Llama-2-7b-chat-finetune"
+# Set cache directory to a writable location
+ENV HF_HOME="/tmp"
+ENV TRANSFORMERS_CACHE="/tmp"
+ENV HUGGINGFACE_HUB_CACHE="/tmp"
 # Run the TGI server
 CMD ["--model-id", "adaptiveaiventures/Llama-2-7b-chat-finetune", "--port", "8080", "--num-shard", "1", "--dtype", "bfloat16", "--max-batch-prefill-tokens", "1024", "--disable-custom-kernels"]