llama2-interference / Dockerfile
adaptiveaiventures's picture
Update Dockerfile
bc65857 verified
raw
history blame contribute delete
325 Bytes
FROM ghcr.io/huggingface/text-generation-inference:latest
# Define a pre-existing Hugging Face model
ENV MODEL_ID="gpt2"
# Use a writable cache directory
ENV HF_HOME="/tmp"
ENV TRANSFORMERS_CACHE="/tmp"
ENV HUGGINGFACE_HUB_CACHE="/tmp"
# Run the TGI server
CMD ["--model-id", "gpt2", "--port", "8080", "--num-shard", "1"]