# Use the latest Ubuntu LTS base image FROM ubuntu:22.04 # Set environment variables ENV NB_USER=jovyan ENV NB_UID=1000 ENV HOME=/home/${NB_USER} ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/ # Upgrade the system and install required packages RUN apt-get update && apt-get dist-upgrade -y && apt-get install -y \ software-properties-common \ wget \ curl \ tar \ bash \ rsync \ gcc \ libfreetype6-dev \ libhdf5-dev \ libpng-dev \ libzmq5-dev \ unzip \ pkg-config \ graphviz \ openjdk-17-jdk \ python3.11 \ python3.11-dev \ python3.11-venv \ python3-pip \ ant \ ca-certificates \ && apt-get clean \ && update-ca-certificates -f # Set up JAVA_HOME for OpenJDK 17 (latest stable release) RUN echo "export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/" >> /etc/profile \ && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile # Upgrade Python tools (pip, setuptools, wheel) RUN python3.11 -m pip install --upgrade pip setuptools wheel # Create a new user with a specific UID RUN useradd -m -u ${NB_UID} ${NB_USER} # Switch to the new user USER ${NB_USER} # Set user-specific environment variables ENV HOME=/home/${NB_USER} ENV PATH=/home/${NB_USER}/.local/bin:$PATH # Set up PySpark to use Python 3.11 for both driver and workers ENV PYSPARK_PYTHON=/usr/bin/python3.11 ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.11 # Install the latest Python dependencies from requirements.txt COPY requirements.txt /tmp/requirements.txt RUN python3.11 -m pip install --upgrade -r /tmp/requirements.txt # Copy application code to the container COPY --chown=${NB_USER}:${NB_USER} . ${HOME} # Expose port for Streamlit EXPOSE 7860 # Define the entry point for the container ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]