Spaces:
Sleeping
Sleeping
Sean-Case
commited on
Commit
·
3034296
1
Parent(s):
55b0541
Now loads in embedding model locally in Dockerfile
Browse files- Dockerfile +14 -1
- app.py +4 -1
- search_funcs/semantic_functions.py +20 -3
Dockerfile
CHANGED
@@ -1,11 +1,24 @@
|
|
1 |
# First stage: build dependencies
|
2 |
FROM public.ecr.aws/docker/library/python:3.10.13-slim AS build
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
WORKDIR /src
|
5 |
|
6 |
COPY requirements.txt .
|
7 |
|
8 |
-
RUN pip install
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# Second stage: final image
|
11 |
FROM build AS final
|
|
|
1 |
# First stage: build dependencies
|
2 |
FROM public.ecr.aws/docker/library/python:3.10.13-slim AS build
|
3 |
|
4 |
+
# Install wget
|
5 |
+
RUN apt-get update && apt-get install -y wget
|
6 |
+
|
7 |
+
# Create a directory for the model
|
8 |
+
RUN mkdir /model
|
9 |
+
|
10 |
WORKDIR /src
|
11 |
|
12 |
COPY requirements.txt .
|
13 |
|
14 |
+
RUN pip install -r requirements.txt
|
15 |
+
|
16 |
+
# Download the model during the build process
|
17 |
+
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
|
18 |
+
RUN apt-get install git-lfs -y
|
19 |
+
RUN git lfs install
|
20 |
+
RUN git clone https://huggingface.co/BAAI/bge-small-en-v1.5 /model/bge
|
21 |
+
RUN rm -rf /model/bge/.git
|
22 |
|
23 |
# Second stage: final image
|
24 |
FROM build AS final
|
app.py
CHANGED
@@ -190,7 +190,10 @@ depends on factors such as the type of documents or queries. Information taken f
|
|
190 |
semantic_query.submit(bge_simple_retrieval, inputs=[semantic_query, vectorstore_state, ingest_docs, in_semantic_column, k_val, out_passages, semantic_min_distance, vec_weight, join_data_state, in_join_column, search_df_join_column], outputs=[semantic_output_single_text, semantic_output_file])
|
191 |
|
192 |
# Simple run for HF spaces or local on your computer
|
193 |
-
block.queue().launch(debug=True)
|
|
|
|
|
|
|
194 |
|
195 |
# Running on local server without https
|
196 |
#block.queue().launch(server_name="0.0.0.0", server_port=7861, ssl_verify=False)
|
|
|
190 |
semantic_query.submit(bge_simple_retrieval, inputs=[semantic_query, vectorstore_state, ingest_docs, in_semantic_column, k_val, out_passages, semantic_min_distance, vec_weight, join_data_state, in_join_column, search_df_join_column], outputs=[semantic_output_single_text, semantic_output_file])
|
191 |
|
192 |
# Simple run for HF spaces or local on your computer
|
193 |
+
#block.queue().launch(debug=True)
|
194 |
+
|
195 |
+
# Running on local server without specifying port
|
196 |
+
block.queue().launch(server_name="0.0.0.0")
|
197 |
|
198 |
# Running on local server without https
|
199 |
#block.queue().launch(server_name="0.0.0.0", server_port=7861, ssl_verify=False)
|
search_funcs/semantic_functions.py
CHANGED
@@ -43,10 +43,27 @@ PandasDataFrame = Type[pd.DataFrame]
|
|
43 |
|
44 |
# Load embeddings
|
45 |
embeddings_name = "BAAI/bge-small-en-v1.5"
|
46 |
-
local_embeddings_location = "model/bge/"
|
47 |
|
48 |
-
#
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def docs_to_bge_embed_np_array(docs_out, in_file, embeddings_state, output_file_state, clean, return_intermediate_files = "No", embeddings_super_compress = "No", embeddings_model = embeddings_model, progress=gr.Progress(track_tqdm=True)):
|
52 |
'''
|
|
|
43 |
|
44 |
# Load embeddings
|
45 |
embeddings_name = "BAAI/bge-small-en-v1.5"
|
|
|
46 |
|
47 |
+
# Define a list of possible local locations to search for the model
|
48 |
+
local_embeddings_locations = [
|
49 |
+
"model/bge/", # Potential local location
|
50 |
+
"/model/bge/", # Potential location in Docker container
|
51 |
+
"/home/user/app/model/bge/" # This is inside a Docker container
|
52 |
+
]
|
53 |
+
|
54 |
+
# Attempt to load the model from each local location
|
55 |
+
for location in local_embeddings_locations:
|
56 |
+
try:
|
57 |
+
embeddings_model = SentenceTransformer(location)
|
58 |
+
print(f"Found local model installation at: {location}")
|
59 |
+
break # Exit the loop if the model is found
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Failed to load model from {location}: {e}")
|
62 |
+
continue
|
63 |
+
else:
|
64 |
+
# If the loop completes without finding the model in any local location
|
65 |
+
embeddings_model = SentenceTransformer(embeddings_name)
|
66 |
+
print("Could not find local model installation. Downloading from Huggingface")
|
67 |
|
68 |
def docs_to_bge_embed_np_array(docs_out, in_file, embeddings_state, output_file_state, clean, return_intermediate_files = "No", embeddings_super_compress = "No", embeddings_model = embeddings_model, progress=gr.Progress(track_tqdm=True)):
|
69 |
'''
|