|
# copied/adapted from oriingal docker file here: https://github.com/NeotomaDB/MetaExtractor/tree/main/docker/data-review-tool |
|
|
|
FROM python:3.10 |
|
|
|
# resolve spaces write permissions issue, sinpiration from: https: |
|
RUN useradd -m -u 1000 user |
|
# USER user |
|
ENV HOME=/home/user \ |
|
PATH=/home/user/.local/bin:$PATH |
|
|
|
WORKDIR $HOME/app |
|
|
|
# COPY --chown=user . $HOME/app |
|
|
|
RUN git clone https: |
|
|
|
# Expose the port your Dash app is running on |
|
# changed from default 8050 to match HF spaces requirements |
|
# edit the port in the app.py file |
|
# inspired from: https: |
|
# comment out file handle line due to file creation permission issues |
|
RUN sed -i "s|port=8050|port=7860 |g" ./MetaExtractor/src/data_review_tool/app.py \ |
|
&& sed -i "s|logger.addHandler(get_file_handler())|#logger.addHandler(get_file_handler())|g" ./MetaExtractor/src/logs.py \ |
|
&& sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/app.py \ |
|
&& sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/pages/home.py \ |
|
&& sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/pages/article_review.py \ |
|
&& sed -i "s|/MetaExtractor/inputs/|./inputs/|g" ./MetaExtractor/src/data_review_tool/app.py \ |
|
#&& sed -i 's|debug=True|debug=False|g' ./MetaExtractor/src/data_review_tool/app.py |
|
|
|
# change ownership of the app folder to user & enable read/write/exectue permissions |
|
RUN chown -R root:user ./MetaExtractor/ \ |
|
&& chmod -R 775 ./MetaExtractor/ |
|
|
|
# Copy the requirements.txt file to the working directory |
|
# COPY ./requirements.txt . |
|
|
|
# Install the Python dependencies, removed --no-cache-dir |
|
RUN pip install -r MetaExtractor/docker/data-review-tool/requirements.txt |
|
|
|
WORKDIR MetaExtractor/ |
|
|
|
ENV LOG_LEVEL=DEBUG |
|
|
|
EXPOSE 7860 |
|
|
|
RUN pip install pyarrow |
|
|
|
RUN mkdir -p ./inputs |
|
|
|
# set defualt env variables to sample files in the repo |
|
ENV ARTICLE_RELEVANCE_BATCH=article-relevance-output.parquet |
|
ENV ENTITY_EXTRACTION_BATCH=entity-extraction-output.zip |
|
|
|
# instead of vlume mapping copy sample files to expected input folder in regular image |
|
RUN cp ./data/data-review-tool/article-relevance-output.parquet ./inputs \ |
|
&& cp ./data/data-review-tool/entity-extraction-output.zip ./inputs |
|
|
|
# switch to user to run the scripts |
|
USER user |
|
|
|
ENTRYPOINT python src/data_review_tool/app.py |