Spaces:

adaptiveaiventures
/

llama2-interference

Runtime error

adaptiveaiventures commited on Jan 18

Commit

bc65857

verified ·

1 Parent(s): 7559f23

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,14 +1,12 @@
 FROM ghcr.io/huggingface/text-generation-inference:latest
-USER root  # Run container as root
-# Define Model
-ENV MODEL_ID="adaptiveaiventures/Llama-2-7b-chat-finetune"
-# Set cache directory to a writable location
 ENV HF_HOME="/tmp"
 ENV TRANSFORMERS_CACHE="/tmp"
 ENV HUGGINGFACE_HUB_CACHE="/tmp"
 # Run the TGI server
-CMD ["--model-id", "adaptiveaiventures/Llama-32-1B-finetuned-aaivtest5", "--port", "8080", "--num-shard", "1", "--dtype", "bfloat16", "--max-batch-prefill-tokens", "512", "--disable-custom-kernels"]

 FROM ghcr.io/huggingface/text-generation-inference:latest
+# Define a pre-existing Hugging Face model
+ENV MODEL_ID="gpt2"
+# Use a writable cache directory
 ENV HF_HOME="/tmp"
 ENV TRANSFORMERS_CACHE="/tmp"
 ENV HUGGINGFACE_HUB_CACHE="/tmp"
 # Run the TGI server
+CMD ["--model-id", "gpt2", "--port", "8080", "--num-shard", "1"]