FROM ghcr.io/huggingface/text-generation-inference:latest

# Define a pre-existing Hugging Face model
ENV MODEL_ID="gpt2"

# Use a writable cache directory
ENV HF_HOME="/tmp"
ENV TRANSFORMERS_CACHE="/tmp"
ENV HUGGINGFACE_HUB_CACHE="/tmp"

# Run the TGI server
CMD ["--model-id", "gpt2", "--port", "8080", "--num-shard", "1"]