Spaces:

itod
/

embeddings

Runtime error

App Files Files Community

itod commited on Jun 9, 2024

Commit

72b0049

1 Parent(s): 2de7320

Initial commit

Browse files

Files changed (6) hide show

.gitignore +3 -0
Dockerfile +39 -0
app/__init__.py +0 -0
app/app.py +68 -0
requirements.txt +9 -0
start.sh +13 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+venv/
+models/
+app/__pycache__/

Dockerfile ADDED Viewed

	@@ -0,0 +1,39 @@

+# Use an official Python runtime as a parent image
+FROM python:3.8-slim-buster
+# Define the model name as a build argument
+ARG MODEL_NAME=WhereIsAI/UAE-Large-V1
+# Set the model name and path as environment variables
+ENV MODEL_NAME=$MODEL_NAME
+ENV MODEL_PATH=/models/${MODEL_NAME}
+# Install necessary Python packages
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+# Install Git and Git LFS
+RUN apt-get update && apt-get install -y git curl
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
+RUN apt-get install git-lfs
+RUN git lfs install
+# Clone the model repository and download the large files
+RUN git clone https://huggingface.co/${MODEL_NAME} /models/${MODEL_NAME}
+RUN cd /models/${MODEL_NAME} && git lfs pull
+# Remove the onnx directory to reduce image size
+RUN rm -rf /models/${MODEL_NAME}/onnx
+# Copy your FastAPI app and the start script into the container
+COPY ./app /app
+COPY start.sh /start.sh
+# Set the working directory
+WORKDIR /app
+# Expose the FastAPI port
+EXPOSE 8080
+# Start the FastAPI server using the start script
+CMD ["/start.sh"]

app/__init__.py ADDED Viewed

File without changes

app/app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+from fastapi import FastAPI
+from typing import Union, List, Dict, Tuple, Optional
+from pydantic import BaseModel, Field
+from angle_emb import AnglE
+class EmbeddingInput(BaseModel):
+    input: Union[List[str], Tuple[str], List[Dict], str] = Field(..., description="The input to be encoded")
+    model: Optional[str] = None
+    encoding_format: Optional[str] = 'float'
+    dimensions: Optional[int] = None
+    user: Optional[str] = None
+app = FastAPI()
+# Get the model name and path from the environment variables
+model_name = os.getenv('MODEL_NAME', default='WhereIsAI/UAE-Large-V1')
+model_path = os.getenv('MODEL_PATH', default='models/WhereIsAI/UAE-Large-V1')
+# Load the model
+try:
+    angle_model = AnglE.from_pretrained(model_path, pooling_strategy='cls').to('cpu')
+except Exception as e:
+    print(f"Failed to load model from path {model_path}. Error: {str(e)}")
+@app.get("/")
+def read_root():
+    return {
+        "model_name": model_name,
+        "model_path": model_path,
+        "message": "Model is up and running",
+        "route_info": {
+            "/": "Returns the model info",
+            "/health": "Returns the health status of the application",
+            "/v1/embeddings": 'POST route to get embeddings. Usage: curl -H "Content-Type: application/json" -d \'{ "input": "Your text string goes here" }\' http://localhost:8080/v1/embeddings'
+        }
+    }
+@app.get("/health")
+def health_check():
+    return {"health": "ok"}
+@app.post("/v1/embeddings")
+def get_embeddings(embedding_input: EmbeddingInput):
+    # # Check if the input is an empty string
+    # if not embedding_input.input.strip():
+    #     return {
+    #         "object": "list",
+    #         "data": [],
+    #         "model": model_name,
+    #         "usage": {"prompt_tokens": 0, "total_tokens": 0},
+    #     }
+    # Encode the input text using the model
+    embeddings = angle_model.encode(embedding_input.input, embedding_size=embedding_input.dimensions)
+    # Create a response format compatible with OpenAI's API
+    response = {
+        "object": "list",
+        "data": [
+            {"object": "embedding", "index": i, "embedding": emb.tolist()}
+            for i, emb in enumerate(embeddings)
+        ],
+        "model": model_name,
+        "usage": {"prompt_tokens": len(embedding_input.input), "total_tokens": len(embedding_input.input)},
+    }
+    return response

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+gunicorn
+pydantic
+angle_emb
+bitsandbytes
+datasets
+transformers
+wandb

start.sh ADDED Viewed

	@@ -0,0 +1,13 @@

+#!/bin/bash
+# # Get the number of cores
+# num_cores=$(nproc --all)
+# # Calculate the number of Uvicorn workers
+# num_workers=$((2 * num_cores + 1))
+# # Set the WEB_CONCURRENCY environment variable
+# export WEB_CONCURRENCY=$num_workers
+# Start the FastAPI application
+exec uvicorn app.app:app --host 0.0.0.0 --port 8080