Petro
commited on
Commit
•
273463a
1
Parent(s):
b245107
- Dockerfile +1 -1
- main.py +0 -12
Dockerfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
-
|
4 |
|
5 |
COPY requirements.txt ./requirements.txt
|
6 |
|
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
+
RUN wget -q zephyr-7b-beta.Q4_K_S.gguf https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf
|
4 |
|
5 |
COPY requirements.txt ./requirements.txt
|
6 |
|
main.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
|
3 |
from ctransformers import AutoModelForCausalLM
|
4 |
from fastapi import FastAPI
|
5 |
from pydantic import BaseModel
|
@@ -17,16 +15,6 @@ class validation(BaseModel):
|
|
17 |
#Fast API
|
18 |
|
19 |
app = FastAPI()
|
20 |
-
file_name = "zephyr-7b-beta.Q4_K_S.gguf"
|
21 |
-
|
22 |
-
if not os.path.exists(file_name):
|
23 |
-
print("Downloading model...")
|
24 |
-
url = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf"
|
25 |
-
response = requests.get(url)
|
26 |
-
print(response.status_code)
|
27 |
-
with open(file_name, 'wb') as file:
|
28 |
-
file.write(response.content)
|
29 |
-
|
30 |
|
31 |
@app.post("/llm_on_cpu")
|
32 |
async def stream(item: validation):
|
|
|
|
|
|
|
1 |
from ctransformers import AutoModelForCausalLM
|
2 |
from fastapi import FastAPI
|
3 |
from pydantic import BaseModel
|
|
|
15 |
#Fast API
|
16 |
|
17 |
app = FastAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@app.post("/llm_on_cpu")
|
20 |
async def stream(item: validation):
|