# copied/adapted from oriingal docker file here: https://github.com/NeotomaDB/MetaExtractor/tree/main/docker/data-review-tool | |
FROM python:3.10 | |
RUN git clone https://github.com/NeotomaDB/MetaExtractor.git | |
# Copy the requirements.txt file to the working directory | |
# COPY ./requirements.txt . | |
# Install the Python dependencies | |
RUN pip install --no-cache-dir -r MetaExtractor/docker/data-review-tool/requirements.txt | |
WORKDIR MetaExtractor/ | |
ENV LOG_LEVEL=DEBUG | |
# Expose the port your Dash app is running on | |
# changed from default 8050 to match HF spaces requirements | |
# edit the port in the app.py file | |
# inspired from: https://stackoverflow.com/questions/27713362/editing-files-from-dockerfile | |
RUN sed -i "s|port=8050|port=7860 |g" ./src/data_review_tool/app.py | |
EXPOSE 7860 | |
RUN pip install pyarrow | |
RUN mkdir -p ./inputs | |
# set defualt env variables to sample files in the repo | |
ENV ARTICLE_RELEVANCE_BATCH=article-relevance-output.parquet | |
ENV ENTITY_EXTRACTION_BATCH=entity-extraction-output.zip | |
# instead of vlume mapping copy sample files to expected input folder in regular image | |
RUN cp ./data/data-review-tool/article-relevance-output.parquet ./inputs \ | |
&& cp ./data/data-review-tool/entity-extraction-output.zip ./inputs | |
ENTRYPOINT python src/data_review_tool/app.py |