matthoffner commited on
Commit
d7d0f08
·
0 Parent(s):

Duplicate from matthoffner/ggml-ctransformers-fastapi

Browse files
Files changed (5) hide show
  1. .gitattributes +34 -0
  2. Dockerfile +23 -0
  3. README.md +19 -0
  4. main.py +45 -0
  5. requirements.txt +11 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:latest
2
+
3
+ ENV PYTHONUNBUFFERED 1
4
+
5
+ EXPOSE 8000
6
+
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH
11
+
12
+ WORKDIR $HOME/app
13
+
14
+ COPY requirements.txt ./
15
+ RUN pip install --upgrade pip && \
16
+ pip install -r requirements.txt
17
+
18
+
19
+ COPY --chown=user . $HOME/app
20
+
21
+ RUN ls -al
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ggml-ctransformers-fastapi
3
+ emoji: 🐳🤗⚡️
4
+ sdk: docker
5
+ app_port: 8000
6
+ duplicated_from: matthoffner/ggml-ctransformers-fastapi
7
+ ---
8
+
9
+ # ggml-ctransformers-fastapi
10
+
11
+ ## <a href="https://github.com/ggerganov/ggml" target="_blank">ggml</a>
12
+ ## <a href="https://github.com/marella/ctransformers" target="_blank">ctransformers</a>
13
+ ## [FastAPI Docs](https://matthoffner-ggml-ctransformers-fastapi.hf.space/docs)
14
+
15
+ ### Updates
16
+
17
+ * Added /v1/chat/completions
18
+ * [Start using ctransformers](https://github.com/marella/ctransformers)
19
+ * [Added starcoder example](https://github.com/ggerganov/ggml/tree/master/examples/starcoder)
main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fastapi
2
+ import json
3
+ import markdown
4
+ import uvicorn
5
+ from fastapi.responses import HTMLResponse
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from sse_starlette.sse import EventSourceResponse
8
+ from ctransformers import AutoModelForCausalLM
9
+ from pydantic import BaseModel
10
+
11
+ llm = AutoModelForCausalLM.from_pretrained("TheBloke/starcoder-beta-GGML",
12
+ model_file="starcoder-beta.ggmlv3.q4_0.bin",
13
+ model_type="starcoder")
14
+ app = fastapi.FastAPI()
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ @app.get("/")
24
+ async def index():
25
+ with open("README.md", "r", encoding="utf-8") as readme_file:
26
+ md_template_string = readme_file.read()
27
+ html_content = markdown.markdown(md_template_string)
28
+ return HTMLResponse(content=html_content, status_code=200)
29
+
30
+ class ChatCompletionRequest(BaseModel):
31
+ prompt: str
32
+
33
+ @app.post("/v1/chat/completions")
34
+ async def chat(request: ChatCompletionRequest, response_mode=None):
35
+ tokens = llm.tokenize(request.prompt)
36
+ async def server_sent_events(chat_chunks, llm):
37
+ for token in llm.generate(chat_chunks):
38
+ yield dict(data=llm.detokenize(token))
39
+ yield dict(data="[DONE]")
40
+
41
+ return EventSourceResponse(server_sent_events(tokens, llm))
42
+
43
+ if __name__ == "__main__":
44
+ uvicorn.run(app, host="0.0.0.0", port=8000)
45
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ uvicorn
2
+ markdown
3
+ fastapi
4
+ loguru
5
+ torch
6
+ numpy
7
+ transformers
8
+ ctransformers
9
+ accelerate
10
+ langchain
11
+ sse_starlette