# copied/adapted from oriingal docker file here: https://github.com/NeotomaDB/MetaExtractor/tree/main/docker/data-review-tool FROM python:3.10 # resolve spaces write permissions issue, sinpiration from: https://discuss.huggingface.co/t/permission-denied-for-writing-files-within-spaces/29799 RUN useradd -m -u 1000 user # USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH WORKDIR $HOME/app # COPY --chown=user . $HOME/app RUN git clone https://github.com/NeotomaDB/MetaExtractor.git # Expose the port your Dash app is running on # changed from default 8050 to match HF spaces requirements # edit the port in the app.py file # inspired from: https://stackoverflow.com/questions/27713362/editing-files-from-dockerfile # comment out file handle line due to file creation permission issues RUN sed -i "s|port=8050|port=7860 |g" ./MetaExtractor/src/data_review_tool/app.py \ && sed -i "s|logger.addHandler(get_file_handler())|#logger.addHandler(get_file_handler())|g" ./MetaExtractor/src/logs.py \ && sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/app.py \ && sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/pages/home.py \ && sed -i 's|/MetaExtractor|.|g' ./MetaExtractor/src/data_review_tool/pages/home.py \ && sed -i 's|/MetaExtractor|.|g' ./MetaExtractor/src/data_review_tool/pages/article_review.py \ && sed -i 's|/entity_extraction|/home/user/app/MetaExtractor/entity_extraction|g' ./MetaExtractor/src/data_review_tool/pages/article_review.py \ && sed -i "s|/MetaExtractor/inputs/|./inputs/|g" ./MetaExtractor/src/data_review_tool/app.py \ && sed -i 's|debug=True|debug=False|g' ./MetaExtractor/src/data_review_tool/app.py # Install the Python dependencies, removed --no-cache-dir # RUN pip install -r MetaExtractor/docker/data-review-tool/requirements.txt # copy requirements.txt in the repo to the docker image COPY ./requirements.txt ./requirements.txt RUN pip install -r requirements.txt WORKDIR MetaExtractor/ ENV LOG_LEVEL=DEBUG EXPOSE 7860 RUN pip install pyarrow RUN mkdir -p ./inputs # set defualt env variables to sample files in the repo ENV ARTICLE_RELEVANCE_BATCH=article-relevance-output.parquet ENV ENTITY_EXTRACTION_BATCH=entity-extraction-output.zip # instead of vlume mapping copy sample files to expected input folder in regular image RUN cp ./data/data-review-tool/article-relevance-output.parquet ./inputs \ && cp ./data/data-review-tool/entity-extraction-output.zip ./inputs # change ownership of the app folder to user & enable read/write/exectue permissions RUN chown -R root:user ../MetaExtractor/ \ && chmod -R 775 ../MetaExtractor/ # switch to user to run the scripts USER user ENTRYPOINT python src/data_review_tool/app.py