Spaces:
Runtime error
Runtime error
11
Browse files- Dockerfile +35 -22
- Dockerfile.backup +0 -61
- app.py +0 -91
- main.py +6 -1
Dockerfile
CHANGED
@@ -1,33 +1,46 @@
|
|
1 |
-
|
|
|
2 |
FROM nvidia/cuda:${CUDA_IMAGE}
|
3 |
|
4 |
-
|
5 |
ENV HOST 0.0.0.0
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
ENV LLAMA_CUBLAS=1
|
19 |
|
20 |
-
# Install
|
21 |
-
|
22 |
-
|
23 |
-
pydantic-settings starlette-context gradio huggingface_hub hf_transfer
|
24 |
|
25 |
-
#
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
RUN useradd -m -u 1000 user
|
29 |
# Switch to the "user" user
|
30 |
USER user
|
|
|
|
|
31 |
# Set home to the user's home directory
|
32 |
ENV HOME=/home/user \
|
33 |
PATH=/home/user/.local/bin:$PATH \
|
@@ -39,9 +52,9 @@ ENV HOME=/home/user \
|
|
39 |
GRADIO_THEME=huggingface \
|
40 |
SYSTEM=spaces
|
41 |
|
42 |
-
WORKDIR $HOME/app
|
43 |
-
|
44 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
45 |
-
COPY --chown=user .
|
46 |
|
47 |
-
|
|
|
|
|
|
1 |
+
# Use an NVIDIA CUDA base image
|
2 |
+
ARG CUDA_IMAGE="12.1.1-cudnn8-devel-ubuntu22.04"
|
3 |
FROM nvidia/cuda:${CUDA_IMAGE}
|
4 |
|
5 |
+
|
6 |
ENV HOST 0.0.0.0
|
7 |
|
8 |
+
# Set the working directory in the container to /app
|
9 |
+
#WORKDIR /app
|
10 |
+
|
11 |
+
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
|
12 |
+
|
13 |
+
ENV HF_HOME=/app/cache
|
14 |
|
15 |
+
# Install Python and pip
|
16 |
+
RUN apt-get update && apt-get install --no-install-recommends -y \
|
17 |
+
build-essential \
|
18 |
+
python3.9 \
|
19 |
+
python3-pip \
|
20 |
+
git \
|
21 |
+
ffmpeg \
|
22 |
+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
23 |
|
24 |
+
# Copy the current directory contents into the container at /app
|
25 |
+
COPY . /app
|
|
|
26 |
|
27 |
+
# Install required packages from requirements.txt
|
28 |
+
COPY ./requirements.txt /app/requirements.txt
|
29 |
+
RUN pip3 install --no-cache-dir -r /app/requirements.txt
|
|
|
30 |
|
31 |
+
# Expose the ports for FastAPI and Streamlit
|
32 |
+
EXPOSE 8000
|
33 |
+
EXPOSE 8501
|
34 |
+
|
35 |
+
# Copy and give execute permissions to the start script
|
36 |
+
COPY start_server.sh /app/start_server.sh
|
37 |
+
RUN chmod +x /app/start_server.sh
|
38 |
|
39 |
RUN useradd -m -u 1000 user
|
40 |
# Switch to the "user" user
|
41 |
USER user
|
42 |
+
|
43 |
+
WORKDIR /home/user/app
|
44 |
# Set home to the user's home directory
|
45 |
ENV HOME=/home/user \
|
46 |
PATH=/home/user/.local/bin:$PATH \
|
|
|
52 |
GRADIO_THEME=huggingface \
|
53 |
SYSTEM=spaces
|
54 |
|
|
|
|
|
55 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
56 |
+
COPY --chown=user . /home/user/app
|
57 |
|
58 |
+
# Run the start script
|
59 |
+
#CMD ["/app/start_server.sh"]
|
60 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
Dockerfile.backup
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
# Use an NVIDIA CUDA base image
|
2 |
-
ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
|
3 |
-
FROM nvidia/cuda:${CUDA_IMAGE}
|
4 |
-
|
5 |
-
|
6 |
-
ENV HOST 0.0.0.0
|
7 |
-
|
8 |
-
# Set the working directory in the container to /app
|
9 |
-
#WORKDIR /app
|
10 |
-
|
11 |
-
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
|
12 |
-
|
13 |
-
ENV HF_HOME=/app/cache
|
14 |
-
|
15 |
-
# Install Python and pip
|
16 |
-
RUN apt-get update && apt-get upgrade -y \
|
17 |
-
&& apt-get install -y git build-essential \
|
18 |
-
python3 python3-pip gcc wget \
|
19 |
-
ocl-icd-opencl-dev opencl-headers clinfo \
|
20 |
-
libclblast-dev libopenblas-dev \
|
21 |
-
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
|
22 |
-
|
23 |
-
ENV CUDA_DOCKER_ARCH=all
|
24 |
-
ENV LLAMA_CUBLAS=1
|
25 |
-
|
26 |
-
# Copy the current directory contents into the container at /app
|
27 |
-
COPY . /app
|
28 |
-
|
29 |
-
# Install required packages from requirements.txt
|
30 |
-
COPY ./requirements.txt /app/requirements.txt
|
31 |
-
RUN pip3 install --no-cache-dir -r /app/requirements.txt
|
32 |
-
|
33 |
-
# Expose the ports for FastAPI and Streamlit
|
34 |
-
EXPOSE 8000
|
35 |
-
EXPOSE 8501
|
36 |
-
|
37 |
-
# Copy and give execute permissions to the start script
|
38 |
-
COPY start_server.sh /app/start_server.sh
|
39 |
-
RUN chmod +x /app/start_server.sh
|
40 |
-
|
41 |
-
RUN useradd -m -u 1000 user
|
42 |
-
# Switch to the "user" user
|
43 |
-
USER user
|
44 |
-
WORKDIR /home/user/app
|
45 |
-
# Set home to the user's home directory
|
46 |
-
ENV HOME=/home/user \
|
47 |
-
PATH=/home/user/.local/bin:$PATH \
|
48 |
-
PYTHONPATH=$HOME/app \
|
49 |
-
PYTHONUNBUFFERED=1 \
|
50 |
-
GRADIO_ALLOW_FLAGGING=never \
|
51 |
-
GRADIO_NUM_PORTS=1 \
|
52 |
-
GRADIO_SERVER_NAME=0.0.0.0 \
|
53 |
-
GRADIO_THEME=huggingface \
|
54 |
-
SYSTEM=spaces
|
55 |
-
|
56 |
-
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
57 |
-
COPY --chown=user . /home/user/app
|
58 |
-
|
59 |
-
# Run the start script
|
60 |
-
#CMD ["/app/start_server.sh"]
|
61 |
-
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
DELETED
@@ -1,91 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
#import gradio as gr
|
3 |
-
import copy
|
4 |
-
import time
|
5 |
-
import llama_cpp
|
6 |
-
from llama_cpp import Llama
|
7 |
-
from huggingface_hub import hf_hub_download
|
8 |
-
from fastapi import FastAPI, Request
|
9 |
-
from fastapi.middleware.cors import CORSMiddleware
|
10 |
-
import nest_asyncio
|
11 |
-
import uvicorn
|
12 |
-
|
13 |
-
app = FastAPI()
|
14 |
-
MODEL_NAME = "TheBloke/CodeLlama-7B-GGUF"
|
15 |
-
PDF_PATH = "/opt/docs"
|
16 |
-
CLASSIFIER_MODEL_NAME = "roberta-large-mnli"
|
17 |
-
|
18 |
-
# Add CORS middleware
|
19 |
-
app.add_middleware(
|
20 |
-
CORSMiddleware,
|
21 |
-
allow_origins=['*'],
|
22 |
-
allow_credentials=True,
|
23 |
-
allow_methods=['*'],
|
24 |
-
allow_headers=['*'],
|
25 |
-
)
|
26 |
-
|
27 |
-
llm = Llama(
|
28 |
-
model_path=hf_hub_download(
|
29 |
-
repo_id=os.environ.get("REPO_ID", MODEL_NAME),
|
30 |
-
filename=os.environ.get("MODEL_FILE", "codellama-7b.Q8_0.gguf"),
|
31 |
-
),
|
32 |
-
n_ctx=2048,
|
33 |
-
n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
|
34 |
-
)
|
35 |
-
|
36 |
-
history = []
|
37 |
-
|
38 |
-
system_message = """
|
39 |
-
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
|
40 |
-
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
|
41 |
-
"""
|
42 |
-
|
43 |
-
@app.post("/predict")
|
44 |
-
def generate_text(message, history):
|
45 |
-
temp = ""
|
46 |
-
input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
|
47 |
-
for interaction in history:
|
48 |
-
input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
|
49 |
-
|
50 |
-
input_prompt = input_prompt + str(message) + " [/INST] "
|
51 |
-
|
52 |
-
output = llm(
|
53 |
-
input_prompt,
|
54 |
-
temperature=0.15,
|
55 |
-
top_p=0.1,
|
56 |
-
top_k=40,
|
57 |
-
repeat_penalty=1.1,
|
58 |
-
max_tokens=1024,
|
59 |
-
stop=[
|
60 |
-
"<|prompter|>",
|
61 |
-
"<|endoftext|>",
|
62 |
-
"<|endoftext|> \n",
|
63 |
-
"ASSISTANT:",
|
64 |
-
"USER:",
|
65 |
-
"SYSTEM:",
|
66 |
-
],
|
67 |
-
stream=True,
|
68 |
-
)
|
69 |
-
for out in output:
|
70 |
-
stream = copy.deepcopy(out)
|
71 |
-
temp += stream["choices"][0]["text"]
|
72 |
-
yield temp
|
73 |
-
|
74 |
-
history = ["init", input_prompt]
|
75 |
-
|
76 |
-
|
77 |
-
# demo = gr.ChatInterface(
|
78 |
-
# generate_text,
|
79 |
-
# title="llama-cpp-python on GPU",
|
80 |
-
# description="Running LLM with https://github.com/abetlen/llama-cpp-python",
|
81 |
-
# examples=["tell me everything about llamas"],
|
82 |
-
# cache_examples=True,
|
83 |
-
# retry_btn=None,
|
84 |
-
# undo_btn="Delete Previous",
|
85 |
-
# clear_btn="Clear",
|
86 |
-
# )
|
87 |
-
# demo.queue(concurrency_count=1, max_size=5)
|
88 |
-
# demo.launch()
|
89 |
-
|
90 |
-
nest_asyncio.apply()
|
91 |
-
uvicorn.run(app, port=8000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
@@ -14,7 +14,7 @@ from langchain.prompts import PromptTemplate
|
|
14 |
from langchain.chains import RetrievalQA
|
15 |
from langchain.schema.runnable import RunnableBranch
|
16 |
from langchain_core.runnables import RunnableLambda
|
17 |
-
|
18 |
|
19 |
# Logger configuration
|
20 |
logging.basicConfig(level=logging.INFO,
|
@@ -22,6 +22,11 @@ logging.basicConfig(level=logging.INFO,
|
|
22 |
datefmt='%Y-%m-%d %H:%M:%S')
|
23 |
logger = logging.getLogger(__name__)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
# Add path to sys
|
26 |
# sys.path.insert(0,'/opt/accelerate')
|
27 |
# sys.path.insert(0,'/opt/uvicorn')
|
|
|
14 |
from langchain.chains import RetrievalQA
|
15 |
from langchain.schema.runnable import RunnableBranch
|
16 |
from langchain_core.runnables import RunnableLambda
|
17 |
+
import torch
|
18 |
|
19 |
# Logger configuration
|
20 |
logging.basicConfig(level=logging.INFO,
|
|
|
22 |
datefmt='%Y-%m-%d %H:%M:%S')
|
23 |
logger = logging.getLogger(__name__)
|
24 |
|
25 |
+
|
26 |
+
import os
|
27 |
+
os.system("nvidia-smi")
|
28 |
+
print("TORCH_CUDA", torch.cuda.is_available())
|
29 |
+
|
30 |
# Add path to sys
|
31 |
# sys.path.insert(0,'/opt/accelerate')
|
32 |
# sys.path.insert(0,'/opt/uvicorn')
|