itod commited on
Commit
72b0049
·
1 Parent(s): 2de7320

Initial commit

Browse files
Files changed (6) hide show
  1. .gitignore +3 -0
  2. Dockerfile +39 -0
  3. app/__init__.py +0 -0
  4. app/app.py +68 -0
  5. requirements.txt +9 -0
  6. start.sh +13 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv/
2
+ models/
3
+ app/__pycache__/
Dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.8-slim-buster
3
+
4
+ # Define the model name as a build argument
5
+ ARG MODEL_NAME=WhereIsAI/UAE-Large-V1
6
+
7
+ # Set the model name and path as environment variables
8
+ ENV MODEL_NAME=$MODEL_NAME
9
+ ENV MODEL_PATH=/models/${MODEL_NAME}
10
+
11
+ # Install necessary Python packages
12
+ COPY requirements.txt /app/requirements.txt
13
+ RUN pip install --no-cache-dir -r /app/requirements.txt
14
+
15
+ # Install Git and Git LFS
16
+ RUN apt-get update && apt-get install -y git curl
17
+ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
18
+ RUN apt-get install git-lfs
19
+ RUN git lfs install
20
+
21
+ # Clone the model repository and download the large files
22
+ RUN git clone https://huggingface.co/${MODEL_NAME} /models/${MODEL_NAME}
23
+ RUN cd /models/${MODEL_NAME} && git lfs pull
24
+
25
+ # Remove the onnx directory to reduce image size
26
+ RUN rm -rf /models/${MODEL_NAME}/onnx
27
+
28
+ # Copy your FastAPI app and the start script into the container
29
+ COPY ./app /app
30
+ COPY start.sh /start.sh
31
+
32
+ # Set the working directory
33
+ WORKDIR /app
34
+
35
+ # Expose the FastAPI port
36
+ EXPOSE 8080
37
+
38
+ # Start the FastAPI server using the start script
39
+ CMD ["/start.sh"]
app/__init__.py ADDED
File without changes
app/app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI
3
+ from typing import Union, List, Dict, Tuple, Optional
4
+ from pydantic import BaseModel, Field
5
+ from angle_emb import AnglE
6
+
7
+ class EmbeddingInput(BaseModel):
8
+ input: Union[List[str], Tuple[str], List[Dict], str] = Field(..., description="The input to be encoded")
9
+ model: Optional[str] = None
10
+ encoding_format: Optional[str] = 'float'
11
+ dimensions: Optional[int] = None
12
+ user: Optional[str] = None
13
+
14
+ app = FastAPI()
15
+
16
+ # Get the model name and path from the environment variables
17
+ model_name = os.getenv('MODEL_NAME', default='WhereIsAI/UAE-Large-V1')
18
+ model_path = os.getenv('MODEL_PATH', default='models/WhereIsAI/UAE-Large-V1')
19
+
20
+ # Load the model
21
+ try:
22
+ angle_model = AnglE.from_pretrained(model_path, pooling_strategy='cls').to('cpu')
23
+ except Exception as e:
24
+ print(f"Failed to load model from path {model_path}. Error: {str(e)}")
25
+
26
+ @app.get("/")
27
+ def read_root():
28
+ return {
29
+ "model_name": model_name,
30
+ "model_path": model_path,
31
+ "message": "Model is up and running",
32
+ "route_info": {
33
+ "/": "Returns the model info",
34
+ "/health": "Returns the health status of the application",
35
+ "/v1/embeddings": 'POST route to get embeddings. Usage: curl -H "Content-Type: application/json" -d \'{ "input": "Your text string goes here" }\' http://localhost:8080/v1/embeddings'
36
+ }
37
+ }
38
+
39
+ @app.get("/health")
40
+ def health_check():
41
+ return {"health": "ok"}
42
+
43
+ @app.post("/v1/embeddings")
44
+ def get_embeddings(embedding_input: EmbeddingInput):
45
+ # # Check if the input is an empty string
46
+ # if not embedding_input.input.strip():
47
+ # return {
48
+ # "object": "list",
49
+ # "data": [],
50
+ # "model": model_name,
51
+ # "usage": {"prompt_tokens": 0, "total_tokens": 0},
52
+ # }
53
+
54
+ # Encode the input text using the model
55
+ embeddings = angle_model.encode(embedding_input.input, embedding_size=embedding_input.dimensions)
56
+
57
+ # Create a response format compatible with OpenAI's API
58
+ response = {
59
+ "object": "list",
60
+ "data": [
61
+ {"object": "embedding", "index": i, "embedding": emb.tolist()}
62
+ for i, emb in enumerate(embeddings)
63
+ ],
64
+ "model": model_name,
65
+ "usage": {"prompt_tokens": len(embedding_input.input), "total_tokens": len(embedding_input.input)},
66
+ }
67
+
68
+ return response
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ gunicorn
4
+ pydantic
5
+ angle_emb
6
+ bitsandbytes
7
+ datasets
8
+ transformers
9
+ wandb
start.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # # Get the number of cores
4
+ # num_cores=$(nproc --all)
5
+
6
+ # # Calculate the number of Uvicorn workers
7
+ # num_workers=$((2 * num_cores + 1))
8
+
9
+ # # Set the WEB_CONCURRENCY environment variable
10
+ # export WEB_CONCURRENCY=$num_workers
11
+
12
+ # Start the FastAPI application
13
+ exec uvicorn app.app:app --host 0.0.0.0 --port 8080