Spaces:

retopara
/

ragflow

Build error

Kevin Hu commited on Sep 25, 2024

Commit

d55a6be

1 Parent(s): 613b83b

make it lighten (#2577)

### What problem does this PR solve?

#2295

### Type of change

- [x] Refactoring

Files changed (4) hide show

Dockerfile.scratch CHANGED Viewed

@@ -14,7 +14,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
 RUN  sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    apt update && apt install -y curl libpython3-dev nginx openmpi-bin openmpi-common libopenmpi-dev libglib2.0-0 libglx-mesa0 \
     && rm -rf /var/lib/apt/lists/* \
     && curl -sSL https://install.python-poetry.org | python3 -
@@ -33,7 +33,7 @@ USER root
 WORKDIR /ragflow
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    apt update && apt install -y nodejs npm cargo && \
     rm -rf /var/lib/apt/lists/*
 COPY web web
@@ -42,7 +42,10 @@ RUN cd web && npm i --force && npm run build
 # install dependencies from poetry.lock file
 COPY pyproject.toml poetry.toml poetry.lock ./
 RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
-    /root/.local/bin/poetry install --sync --no-root
 # production stage
 FROM base AS production
@@ -76,8 +79,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
 # Download nltk data
 RUN python3 -m nltk.downloader wordnet punkt punkt_tab
-# Copy models downloaded via download_deps.sh
-COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/
 ENV PYTHONPATH=/ragflow/

 RUN  sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 \
     && rm -rf /var/lib/apt/lists/* \
     && curl -sSL https://install.python-poetry.org | python3 -
 WORKDIR /ragflow
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt update && apt install -y nodejs npm && \
     rm -rf /var/lib/apt/lists/*
 COPY web web
 # install dependencies from poetry.lock file
 COPY pyproject.toml poetry.toml poetry.lock ./
 RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
+    /root/.local/bin/poetry lock
+RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
+    /root/.local/bin/poetry install --sync --no-cache --no-root
 # production stage
 FROM base AS production
 # Download nltk data
 RUN python3 -m nltk.downloader wordnet punkt punkt_tab
 ENV PYTHONPATH=/ragflow/

pyproject.toml CHANGED Viewed

@@ -99,7 +99,7 @@ tika = "2.6.0"
 tiktoken = "0.6.0"
 torch = "2.3.0"
 transformers = "4.38.1"
-umap = "0.1.1"
 vertexai = "1.64.0"
 volcengine = "1.0.146"
 voyageai = "0.2.3"

 tiktoken = "0.6.0"
 torch = "2.3.0"
 transformers = "4.38.1"
+umap_learn = "0.5.6"
 vertexai = "1.64.0"
 volcengine = "1.0.146"
 voyageai = "0.2.3"

rag/llm/embedding_model.py CHANGED Viewed

@@ -245,8 +245,8 @@ class FastEmbed(Base):
             threads: Optional[int] = None,
             **kwargs,
     ):
-        from fastembed import TextEmbedding
-        if not FastEmbed._model:
             self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs)
     def encode(self, texts: list, batch_size=32):
@@ -291,8 +291,8 @@ class YoudaoEmbed(Base):
     _client = None
     def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs):
-        from BCEmbedding import EmbeddingModel as qanthing
-        if not YoudaoEmbed._client:
             try:
                 print("LOADING BCE...")
                 YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join(

             threads: Optional[int] = None,
             **kwargs,
     ):
+        if not LIGHTEN and not FastEmbed._model:
+            from fastembed import TextEmbedding
             self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs)
     def encode(self, texts: list, batch_size=32):
     _client = None
     def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs):
+        if not LIGHTEN and not YoudaoEmbed._client:
+            from BCEmbedding import EmbeddingModel as qanthing
             try:
                 print("LOADING BCE...")
                 YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join(

rag/llm/rerank_model.py CHANGED Viewed

@@ -109,8 +109,8 @@ class YoudaoRerank(DefaultRerank):
     _model_lock = threading.Lock()
     def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs):
-        from BCEmbedding import RerankerModel
-        if not YoudaoRerank._model:
             with YoudaoRerank._model_lock:
                 if not YoudaoRerank._model:
                     try:

     _model_lock = threading.Lock()
     def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs):
+        if not LIGHTEN and not YoudaoRerank._model:
+            from BCEmbedding import RerankerModel
             with YoudaoRerank._model_lock:
                 if not YoudaoRerank._model:
                     try: