File size: 1,280 Bytes
c69ef82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# copied/adapted from oriingal docker file here: https://github.com/NeotomaDB/MetaExtractor/tree/main/docker/data-review-tool

FROM python:3.10

RUN git clone https://github.com/NeotomaDB/MetaExtractor.git

# Copy the requirements.txt file to the working directory
# COPY ./requirements.txt .

# Install the Python dependencies
RUN pip install --no-cache-dir -r MetaExtractor/docker/data-review-tool/requirements.txt

WORKDIR MetaExtractor/

ENV LOG_LEVEL=DEBUG

# Expose the port your Dash app is running on
# changed from default 8050 to match HF spaces requirements
# edit the port in the app.py file
# inspired from: https://stackoverflow.com/questions/27713362/editing-files-from-dockerfile
RUN sed -i "s|port=8050|port=7860 |g" ./src/data_review_tool/app.py
EXPOSE 7860

RUN pip install pyarrow

RUN mkdir -p ./inputs

# set defualt env variables to sample files in the repo
ENV ARTICLE_RELEVANCE_BATCH=article-relevance-output.parquet
ENV ENTITY_EXTRACTION_BATCH=entity-extraction-output.zip

# instead of vlume mapping copy sample files to expected input folder in regular image
RUN cp ./data/data-review-tool/article-relevance-output.parquet ./inputs \
    && cp ./data/data-review-tool/entity-extraction-output.zip ./inputs

ENTRYPOINT python src/data_review_tool/app.py