fastelectronicvegetable commited on
Commit
69242cf
·
1 Parent(s): 40b1009
Files changed (6) hide show
  1. Dockerfile +20 -0
  2. README.md +4 -6
  3. justfile +8 -0
  4. main.py +47 -0
  5. requirements.txt +7 -0
  6. shell.nix +31 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ RUN useradd -m -u 1000 user
10
+
11
+ USER user
12
+
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ WORKDIR $HOME/app
17
+
18
+ COPY --chown=user . $HOME/app
19
+
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- title: Sentence Transformers Test
3
- emoji: 🚀
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Fastapi Dummy
3
+ emoji: 🐢
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
 
 
justfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ set fallback
2
+
3
+ run:
4
+
5
+ dev:
6
+ source venv/bin/activate
7
+ nix-shell
8
+ uvicorn src.main:app --reload
main.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Generic, List, Optional, TypeVar
2
+ from functools import partial
3
+ from pydantic import BaseModel, ValidationError, validator
4
+ from pydantic.generics import GenericModel
5
+ from sentence_transformers import SentenceTransformer
6
+ from fastapi import FastAPI
7
+ import os, asyncio, numpy, ujson
8
+
9
+ MODEL = SentenceTransformer("all-mpnet-base-v2")
10
+
11
+ def cache(func):
12
+ inner_cache = dict()
13
+ def inner(sentences: List[str]):
14
+ if len(sentences) == 0:
15
+ return []
16
+ not_in_cache = list(filter(lambda s: s not in inner_cache.keys(), sentences))
17
+ if len(not_in_cache) > 0:
18
+ processed_sentences = func(list(not_in_cache))
19
+ for sentence, embedding in zip(not_in_cache, processed_sentences):
20
+ inner_cache[sentence] = embedding
21
+ return [inner_cache[s] for s in sentences]
22
+ return inner
23
+
24
+ @cache
25
+ def _encode(sentences: List[str]):
26
+ array = [numpy.around(a.numpy(), 3) for a in MODEL.encode(sentences, normalize_embeddings=True, convert_to_tensor=True, batch_size=4, show_progress_bar=True)]
27
+ return array
28
+
29
+ async def encode(sentences: List[str]) -> List[numpy.ndarray]:
30
+ loop = asyncio.get_event_loop()
31
+ result = await loop.run_in_executor(None, _encode, sentences)
32
+ return result
33
+
34
+ class SemanticSearchReq(BaseModel):
35
+ query: str
36
+ candidates: List[str]
37
+
38
+ class EmbedReq(BaseModel):
39
+ sentences: List[str]
40
+
41
+ app = FastAPI()
42
+
43
+ @app.post("/embed")
44
+ async def embed(embed: EmbedReq):
45
+ result = await encode(embed.sentences)
46
+ # Convert it to an ordinary list of floats
47
+ return ujson.dumps([r.tolist() for r in result])
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ sentence-transformers==2.2
2
+ numpy==1.24
3
+ joblib==1.2
4
+ fastapi==0.89
5
+ uvicorn[standard]==0.20
6
+ huggingface-hub==0.10.1
7
+ ujson==5.7
shell.nix ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ with import <nixpkgs> {};
2
+ mkShell {
3
+ packages = [
4
+ rustup
5
+ cargo
6
+
7
+ openssl
8
+ cacert
9
+ libiconv
10
+ pkg-config
11
+ libunistring
12
+ openblas
13
+ gfortran
14
+ gcc
15
+ sentencepiece
16
+ python39Packages.sentencepiece
17
+
18
+ wget
19
+
20
+ xz
21
+ gzip
22
+ bzip2
23
+ curlFull
24
+ ];
25
+ buildInputs = [] ++ lib.optionals stdenv.isDarwin [
26
+ darwin.apple_sdk.frameworks.IOKit
27
+ darwin.apple_sdk.frameworks.Security
28
+ darwin.apple_sdk.frameworks.CoreServices
29
+ darwin.apple_sdk.frameworks.CoreFoundation
30
+ ] ;
31
+ }