Shawn732 commited on
Commit
6d69370
·
1 Parent(s): 7e9c0cb
Files changed (4) hide show
  1. Dockerfile +35 -22
  2. Dockerfile.backup +0 -61
  3. app.py +0 -91
  4. main.py +6 -1
Dockerfile CHANGED
@@ -1,33 +1,46 @@
1
- ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
 
2
  FROM nvidia/cuda:${CUDA_IMAGE}
3
 
4
- # We need to set the host to 0.0.0.0 to allow outside access
5
  ENV HOST 0.0.0.0
6
 
7
- RUN apt-get update && apt-get upgrade -y \
8
- && apt-get install -y git build-essential \
9
- python3 python3-pip gcc wget \
10
- ocl-icd-opencl-dev opencl-headers clinfo \
11
- libclblast-dev libopenblas-dev \
12
- && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
13
 
14
- COPY . .
 
 
 
 
 
 
 
15
 
16
- # setting build related env vars
17
- ENV CUDA_DOCKER_ARCH=all
18
- ENV LLAMA_CUBLAS=1
19
 
20
- # Install depencencies
21
- RUN python3 -m pip install --upgrade pip pytest cmake \
22
- scikit-build setuptools fastapi uvicorn sse-starlette \
23
- pydantic-settings starlette-context gradio huggingface_hub hf_transfer
24
 
25
- # Install llama-cpp-python (build with cuda)
26
- RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
 
 
 
 
 
27
 
28
  RUN useradd -m -u 1000 user
29
  # Switch to the "user" user
30
  USER user
 
 
31
  # Set home to the user's home directory
32
  ENV HOME=/home/user \
33
  PATH=/home/user/.local/bin:$PATH \
@@ -39,9 +52,9 @@ ENV HOME=/home/user \
39
  GRADIO_THEME=huggingface \
40
  SYSTEM=spaces
41
 
42
- WORKDIR $HOME/app
43
-
44
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
45
- COPY --chown=user . $HOME/app
46
 
47
- CMD ["python3", "app.py"]
 
 
 
1
+ # Use an NVIDIA CUDA base image
2
+ ARG CUDA_IMAGE="12.1.1-cudnn8-devel-ubuntu22.04"
3
  FROM nvidia/cuda:${CUDA_IMAGE}
4
 
5
+
6
  ENV HOST 0.0.0.0
7
 
8
+ # Set the working directory in the container to /app
9
+ #WORKDIR /app
10
+
11
+ RUN mkdir -p /app/cache && chmod -R 777 /app/cache
12
+
13
+ ENV HF_HOME=/app/cache
14
 
15
+ # Install Python and pip
16
+ RUN apt-get update && apt-get install --no-install-recommends -y \
17
+ build-essential \
18
+ python3.9 \
19
+ python3-pip \
20
+ git \
21
+ ffmpeg \
22
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
23
 
24
+ # Copy the current directory contents into the container at /app
25
+ COPY . /app
 
26
 
27
+ # Install required packages from requirements.txt
28
+ COPY ./requirements.txt /app/requirements.txt
29
+ RUN pip3 install --no-cache-dir -r /app/requirements.txt
 
30
 
31
+ # Expose the ports for FastAPI and Streamlit
32
+ EXPOSE 8000
33
+ EXPOSE 8501
34
+
35
+ # Copy and give execute permissions to the start script
36
+ COPY start_server.sh /app/start_server.sh
37
+ RUN chmod +x /app/start_server.sh
38
 
39
  RUN useradd -m -u 1000 user
40
  # Switch to the "user" user
41
  USER user
42
+
43
+ WORKDIR /home/user/app
44
  # Set home to the user's home directory
45
  ENV HOME=/home/user \
46
  PATH=/home/user/.local/bin:$PATH \
 
52
  GRADIO_THEME=huggingface \
53
  SYSTEM=spaces
54
 
 
 
55
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
56
+ COPY --chown=user . /home/user/app
57
 
58
+ # Run the start script
59
+ #CMD ["/app/start_server.sh"]
60
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
Dockerfile.backup DELETED
@@ -1,61 +0,0 @@
1
- # Use an NVIDIA CUDA base image
2
- ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
3
- FROM nvidia/cuda:${CUDA_IMAGE}
4
-
5
-
6
- ENV HOST 0.0.0.0
7
-
8
- # Set the working directory in the container to /app
9
- #WORKDIR /app
10
-
11
- RUN mkdir -p /app/cache && chmod -R 777 /app/cache
12
-
13
- ENV HF_HOME=/app/cache
14
-
15
- # Install Python and pip
16
- RUN apt-get update && apt-get upgrade -y \
17
- && apt-get install -y git build-essential \
18
- python3 python3-pip gcc wget \
19
- ocl-icd-opencl-dev opencl-headers clinfo \
20
- libclblast-dev libopenblas-dev \
21
- && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
22
-
23
- ENV CUDA_DOCKER_ARCH=all
24
- ENV LLAMA_CUBLAS=1
25
-
26
- # Copy the current directory contents into the container at /app
27
- COPY . /app
28
-
29
- # Install required packages from requirements.txt
30
- COPY ./requirements.txt /app/requirements.txt
31
- RUN pip3 install --no-cache-dir -r /app/requirements.txt
32
-
33
- # Expose the ports for FastAPI and Streamlit
34
- EXPOSE 8000
35
- EXPOSE 8501
36
-
37
- # Copy and give execute permissions to the start script
38
- COPY start_server.sh /app/start_server.sh
39
- RUN chmod +x /app/start_server.sh
40
-
41
- RUN useradd -m -u 1000 user
42
- # Switch to the "user" user
43
- USER user
44
- WORKDIR /home/user/app
45
- # Set home to the user's home directory
46
- ENV HOME=/home/user \
47
- PATH=/home/user/.local/bin:$PATH \
48
- PYTHONPATH=$HOME/app \
49
- PYTHONUNBUFFERED=1 \
50
- GRADIO_ALLOW_FLAGGING=never \
51
- GRADIO_NUM_PORTS=1 \
52
- GRADIO_SERVER_NAME=0.0.0.0 \
53
- GRADIO_THEME=huggingface \
54
- SYSTEM=spaces
55
-
56
- # Copy the current directory contents into the container at $HOME/app setting the owner to the user
57
- COPY --chown=user . /home/user/app
58
-
59
- # Run the start script
60
- #CMD ["/app/start_server.sh"]
61
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py DELETED
@@ -1,91 +0,0 @@
1
- import os
2
- #import gradio as gr
3
- import copy
4
- import time
5
- import llama_cpp
6
- from llama_cpp import Llama
7
- from huggingface_hub import hf_hub_download
8
- from fastapi import FastAPI, Request
9
- from fastapi.middleware.cors import CORSMiddleware
10
- import nest_asyncio
11
- import uvicorn
12
-
13
- app = FastAPI()
14
- MODEL_NAME = "TheBloke/CodeLlama-7B-GGUF"
15
- PDF_PATH = "/opt/docs"
16
- CLASSIFIER_MODEL_NAME = "roberta-large-mnli"
17
-
18
- # Add CORS middleware
19
- app.add_middleware(
20
- CORSMiddleware,
21
- allow_origins=['*'],
22
- allow_credentials=True,
23
- allow_methods=['*'],
24
- allow_headers=['*'],
25
- )
26
-
27
- llm = Llama(
28
- model_path=hf_hub_download(
29
- repo_id=os.environ.get("REPO_ID", MODEL_NAME),
30
- filename=os.environ.get("MODEL_FILE", "codellama-7b.Q8_0.gguf"),
31
- ),
32
- n_ctx=2048,
33
- n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
34
- )
35
-
36
- history = []
37
-
38
- system_message = """
39
- You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
40
- If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
41
- """
42
-
43
- @app.post("/predict")
44
- def generate_text(message, history):
45
- temp = ""
46
- input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
47
- for interaction in history:
48
- input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
49
-
50
- input_prompt = input_prompt + str(message) + " [/INST] "
51
-
52
- output = llm(
53
- input_prompt,
54
- temperature=0.15,
55
- top_p=0.1,
56
- top_k=40,
57
- repeat_penalty=1.1,
58
- max_tokens=1024,
59
- stop=[
60
- "<|prompter|>",
61
- "<|endoftext|>",
62
- "<|endoftext|> \n",
63
- "ASSISTANT:",
64
- "USER:",
65
- "SYSTEM:",
66
- ],
67
- stream=True,
68
- )
69
- for out in output:
70
- stream = copy.deepcopy(out)
71
- temp += stream["choices"][0]["text"]
72
- yield temp
73
-
74
- history = ["init", input_prompt]
75
-
76
-
77
- # demo = gr.ChatInterface(
78
- # generate_text,
79
- # title="llama-cpp-python on GPU",
80
- # description="Running LLM with https://github.com/abetlen/llama-cpp-python",
81
- # examples=["tell me everything about llamas"],
82
- # cache_examples=True,
83
- # retry_btn=None,
84
- # undo_btn="Delete Previous",
85
- # clear_btn="Clear",
86
- # )
87
- # demo.queue(concurrency_count=1, max_size=5)
88
- # demo.launch()
89
-
90
- nest_asyncio.apply()
91
- uvicorn.run(app, port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -14,7 +14,7 @@ from langchain.prompts import PromptTemplate
14
  from langchain.chains import RetrievalQA
15
  from langchain.schema.runnable import RunnableBranch
16
  from langchain_core.runnables import RunnableLambda
17
-
18
 
19
  # Logger configuration
20
  logging.basicConfig(level=logging.INFO,
@@ -22,6 +22,11 @@ logging.basicConfig(level=logging.INFO,
22
  datefmt='%Y-%m-%d %H:%M:%S')
23
  logger = logging.getLogger(__name__)
24
 
 
 
 
 
 
25
  # Add path to sys
26
  # sys.path.insert(0,'/opt/accelerate')
27
  # sys.path.insert(0,'/opt/uvicorn')
 
14
  from langchain.chains import RetrievalQA
15
  from langchain.schema.runnable import RunnableBranch
16
  from langchain_core.runnables import RunnableLambda
17
+ import torch
18
 
19
  # Logger configuration
20
  logging.basicConfig(level=logging.INFO,
 
22
  datefmt='%Y-%m-%d %H:%M:%S')
23
  logger = logging.getLogger(__name__)
24
 
25
+
26
+ import os
27
+ os.system("nvidia-smi")
28
+ print("TORCH_CUDA", torch.cuda.is_available())
29
+
30
  # Add path to sys
31
  # sys.path.insert(0,'/opt/accelerate')
32
  # sys.path.insert(0,'/opt/uvicorn')