Sean-Case commited on
Commit
3034296
·
1 Parent(s): 55b0541

Now loads in embedding model locally in Dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -1
  2. app.py +4 -1
  3. search_funcs/semantic_functions.py +20 -3
Dockerfile CHANGED
@@ -1,11 +1,24 @@
1
  # First stage: build dependencies
2
  FROM public.ecr.aws/docker/library/python:3.10.13-slim AS build
3
 
 
 
 
 
 
 
4
  WORKDIR /src
5
 
6
  COPY requirements.txt .
7
 
8
- RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
 
 
9
 
10
  # Second stage: final image
11
  FROM build AS final
 
1
  # First stage: build dependencies
2
  FROM public.ecr.aws/docker/library/python:3.10.13-slim AS build
3
 
4
+ # Install wget
5
+ RUN apt-get update && apt-get install -y wget
6
+
7
+ # Create a directory for the model
8
+ RUN mkdir /model
9
+
10
  WORKDIR /src
11
 
12
  COPY requirements.txt .
13
 
14
+ RUN pip install -r requirements.txt
15
+
16
+ # Download the model during the build process
17
+ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
18
+ RUN apt-get install git-lfs -y
19
+ RUN git lfs install
20
+ RUN git clone https://huggingface.co/BAAI/bge-small-en-v1.5 /model/bge
21
+ RUN rm -rf /model/bge/.git
22
 
23
  # Second stage: final image
24
  FROM build AS final
app.py CHANGED
@@ -190,7 +190,10 @@ depends on factors such as the type of documents or queries. Information taken f
190
  semantic_query.submit(bge_simple_retrieval, inputs=[semantic_query, vectorstore_state, ingest_docs, in_semantic_column, k_val, out_passages, semantic_min_distance, vec_weight, join_data_state, in_join_column, search_df_join_column], outputs=[semantic_output_single_text, semantic_output_file])
191
 
192
  # Simple run for HF spaces or local on your computer
193
- block.queue().launch(debug=True)
 
 
 
194
 
195
  # Running on local server without https
196
  #block.queue().launch(server_name="0.0.0.0", server_port=7861, ssl_verify=False)
 
190
  semantic_query.submit(bge_simple_retrieval, inputs=[semantic_query, vectorstore_state, ingest_docs, in_semantic_column, k_val, out_passages, semantic_min_distance, vec_weight, join_data_state, in_join_column, search_df_join_column], outputs=[semantic_output_single_text, semantic_output_file])
191
 
192
  # Simple run for HF spaces or local on your computer
193
+ #block.queue().launch(debug=True)
194
+
195
+ # Running on local server without specifying port
196
+ block.queue().launch(server_name="0.0.0.0")
197
 
198
  # Running on local server without https
199
  #block.queue().launch(server_name="0.0.0.0", server_port=7861, ssl_verify=False)
search_funcs/semantic_functions.py CHANGED
@@ -43,10 +43,27 @@ PandasDataFrame = Type[pd.DataFrame]
43
 
44
  # Load embeddings
45
  embeddings_name = "BAAI/bge-small-en-v1.5"
46
- local_embeddings_location = "model/bge/"
47
 
48
- # Not using SentenceTransformer here
49
- embeddings_model = SentenceTransformer(embeddings_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def docs_to_bge_embed_np_array(docs_out, in_file, embeddings_state, output_file_state, clean, return_intermediate_files = "No", embeddings_super_compress = "No", embeddings_model = embeddings_model, progress=gr.Progress(track_tqdm=True)):
52
  '''
 
43
 
44
  # Load embeddings
45
  embeddings_name = "BAAI/bge-small-en-v1.5"
 
46
 
47
+ # Define a list of possible local locations to search for the model
48
+ local_embeddings_locations = [
49
+ "model/bge/", # Potential local location
50
+ "/model/bge/", # Potential location in Docker container
51
+ "/home/user/app/model/bge/" # This is inside a Docker container
52
+ ]
53
+
54
+ # Attempt to load the model from each local location
55
+ for location in local_embeddings_locations:
56
+ try:
57
+ embeddings_model = SentenceTransformer(location)
58
+ print(f"Found local model installation at: {location}")
59
+ break # Exit the loop if the model is found
60
+ except Exception as e:
61
+ print(f"Failed to load model from {location}: {e}")
62
+ continue
63
+ else:
64
+ # If the loop completes without finding the model in any local location
65
+ embeddings_model = SentenceTransformer(embeddings_name)
66
+ print("Could not find local model installation. Downloading from Huggingface")
67
 
68
  def docs_to_bge_embed_np_array(docs_out, in_file, embeddings_state, output_file_state, clean, return_intermediate_files = "No", embeddings_super_compress = "No", embeddings_model = embeddings_model, progress=gr.Progress(track_tqdm=True)):
69
  '''