# copied/adapted from oriingal docker file here: https://github.com/NeotomaDB/MetaExtractor/tree/main/docker/data-review-tool FROM python:3.10 RUN git clone https://github.com/NeotomaDB/MetaExtractor.git # Copy the requirements.txt file to the working directory # COPY ./requirements.txt . # Install the Python dependencies RUN pip install --no-cache-dir -r MetaExtractor/docker/data-review-tool/requirements.txt WORKDIR MetaExtractor/ ENV LOG_LEVEL=DEBUG # Expose the port your Dash app is running on # changed from default 8050 to match HF spaces requirements # edit the port in the app.py file # inspired from: https://stackoverflow.com/questions/27713362/editing-files-from-dockerfile RUN sed -i "s|port=8050|port=7860 |g" ./src/data_review_tool/app.py EXPOSE 7860 RUN pip install pyarrow RUN mkdir -p ./inputs # set defualt env variables to sample files in the repo ENV ARTICLE_RELEVANCE_BATCH=article-relevance-output.parquet ENV ENTITY_EXTRACTION_BATCH=entity-extraction-output.zip # instead of vlume mapping copy sample files to expected input folder in regular image RUN cp ./data/data-review-tool/article-relevance-output.parquet ./inputs \ && cp ./data/data-review-tool/entity-extraction-output.zip ./inputs ENTRYPOINT python src/data_review_tool/app.py