File size: 2,808 Bytes
c69ef82
 
 
 
53c2b9d
 
 
 
 
 
 
 
 
 
c69ef82
 
53c2b9d
 
 
 
 
 
ad2ce19
 
 
fd26066
 
 
 
 
c69ef82
ad2ce19
10e318a
 
 
 
c69ef82
 
 
 
 
 
 
 
 
 
 
 
ecff0e4
c69ef82
 
 
 
 
 
fd26066
 
 
 
53c2b9d
 
 
c69ef82
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# copied/adapted from oriingal docker file here: https://github.com/NeotomaDB/MetaExtractor/tree/main/docker/data-review-tool

FROM python:3.10

# resolve spaces write permissions issue, sinpiration from: https://discuss.huggingface.co/t/permission-denied-for-writing-files-within-spaces/29799
RUN useradd -m -u 1000 user
# USER user
ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH

WORKDIR $HOME/app

# COPY --chown=user . $HOME/app

RUN git clone https://github.com/NeotomaDB/MetaExtractor.git

# Expose the port your Dash app is running on
# changed from default 8050 to match HF spaces requirements
# edit the port in the app.py file
# inspired from: https://stackoverflow.com/questions/27713362/editing-files-from-dockerfile
# comment out file handle line due to file creation permission issues
RUN sed -i "s|port=8050|port=7860 |g" ./MetaExtractor/src/data_review_tool/app.py \
    && sed -i "s|logger.addHandler(get_file_handler())|#logger.addHandler(get_file_handler())|g" ./MetaExtractor/src/logs.py \
    && sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/app.py \
    && sed -i 's|/entity_extraction|./entity_extraction|g' ./MetaExtractor/src/data_review_tool/pages/home.py \
    && sed -i 's|/MetaExtractor|.|g' ./MetaExtractor/src/data_review_tool/pages/home.py \
    && sed -i 's|/MetaExtractor|.|g' ./MetaExtractor/src/data_review_tool/pages/article_review.py \
    && sed -i 's|/entity_extraction|/home/user/app/MetaExtractor/entity_extraction|g' ./MetaExtractor/src/data_review_tool/pages/article_review.py \
    && sed -i "s|/MetaExtractor/inputs/|./inputs/|g" ./MetaExtractor/src/data_review_tool/app.py \
    && sed -i 's|debug=True|debug=False|g' ./MetaExtractor/src/data_review_tool/app.py 

# Install the Python dependencies, removed --no-cache-dir
# RUN pip install -r MetaExtractor/docker/data-review-tool/requirements.txt
# copy requirements.txt in the repo to the docker image
COPY ./requirements.txt ./requirements.txt
RUN pip install -r requirements.txt

WORKDIR MetaExtractor/

ENV LOG_LEVEL=DEBUG

EXPOSE 7860

RUN pip install pyarrow

RUN mkdir -p ./inputs

# set defualt env variables to sample files in the repo
ENV ARTICLE_RELEVANCE_BATCH=article-relevance-output.parquet
ENV ENTITY_EXTRACTION_BATCH=entity-extraction-output.zip

# instead of vlume mapping copy sample files to expected input folder in regular image
RUN cp ./data/data-review-tool/article-relevance-output.parquet ./inputs \
    && cp ./data/data-review-tool/entity-extraction-output.zip ./inputs

# change ownership of the app folder to user & enable read/write/exectue permissions
RUN chown -R root:user ../MetaExtractor/ \
    && chmod -R 775 ../MetaExtractor/

# switch to user to run the scripts
USER user

ENTRYPOINT python src/data_review_tool/app.py