FROM ghcr.io/huggingface/text-generation-inference:latest # Define a pre-existing Hugging Face model ENV MODEL_ID="gpt2" # Use a writable cache directory ENV HF_HOME="/tmp" ENV TRANSFORMERS_CACHE="/tmp" ENV HUGGINGFACE_HUB_CACHE="/tmp" # Run the TGI server CMD ["--model-id", "gpt2", "--port", "8080", "--num-shard", "1"]