Christoph Holthaus commited on
Commit
2ae746c
·
1 Parent(s): ab2d9f0

took alot from https://huggingface.co/spaces/imperialwool/llama-cpp-api/blob/main

Browse files
Files changed (4) hide show
  1. .gitignore +162 -0
  2. Dockerfile +26 -65
  3. gradio_app.py +86 -0
  4. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BASIC PYTHON .GITIGNORE + some for testing
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
Dockerfile CHANGED
@@ -1,73 +1,34 @@
1
- # Dockerfile to deploy a llama-cpp container with conda-ready environments
 
2
 
3
- # docker pull continuumio/miniconda3:latest
 
4
 
5
- ARG TAG=latest
6
- FROM continuumio/miniconda3:$TAG
 
7
 
8
- RUN apt-get update \
9
- && DEBIAN_FRONTEND="noninteractive" apt-get install -y --no-install-recommends \
10
- git \
11
- locales \
12
- sudo \
13
- build-essential \
14
- dpkg-dev \
15
- wget \
16
- openssh-server \
17
- nano \
18
- && rm -rf /var/lib/apt/lists/*
19
 
20
- # Setting up locales
21
 
22
- RUN locale-gen en_US.UTF-8
23
- ENV LANG en_US.UTF-8
 
 
24
 
25
- # SSH exposition
 
 
 
 
26
 
27
- EXPOSE 22/tcp
28
- RUN service ssh start
 
29
 
30
- # Create user
31
-
32
- RUN groupadd --gid 1020 llama-cpp-group
33
- RUN useradd -rm -d /home/llama-cpp-user -s /bin/bash -G users,sudo,llama-cpp-group -u 1000 llama-cpp-user
34
-
35
- # Update user password
36
- RUN echo 'llama-cpp-user:admin' | chpasswd
37
-
38
- # Updating conda to the latest version
39
- RUN conda update conda -y
40
-
41
- # Create virtalenv
42
- RUN conda create -n llamacpp -y python=3.10.6
43
-
44
- # Adding ownership of /opt/conda to $user
45
- RUN chown -R llama-cpp-user:users /opt/conda
46
-
47
- # conda init bash for $user
48
- RUN su - llama-cpp-user -c "conda init bash"
49
-
50
- # Download latest github/llama-cpp in llama.cpp directory and compile it
51
- RUN su - llama-cpp-user -c "git clone https://github.com/ggerganov/llama.cpp.git ~/llama.cpp \
52
- && cd ~/llama.cpp \
53
- && make "
54
-
55
- # Install Requirements for python virtualenv
56
- RUN su - llama-cpp-user -c "cd ~/llama.cpp \
57
- && conda activate llamacpp \
58
- && python3 -m pip install -r requirements.txt "
59
-
60
- # Download model
61
- RUN su - llama-cpp-user -c "https://github.com/facebookresearch/llama.git ~/llama \
62
- && cd ~/llama \
63
- && ./download.sh "
64
-
65
- # COPY entrypoint.sh /usr/bin/entrypoint
66
- # RUN chmod 755 /usr/bin/entrypoint
67
- # ENTRYPOINT ["/usr/bin/entrypoint"]
68
-
69
- # Preparing for login
70
- ENV HOME /home/llama-cpp-user
71
- WORKDIR ${HOME}/llama.cpp
72
- USER llama-cpp-user
73
- CMD ["/bin/bash"]
 
1
+ # Loading base. I'm using Debian, u can use whatever u want.
2
+ FROM python:3.11.5-slim-bookworm
3
 
4
+ # Just for sure everything will be fine.
5
+ USER root
6
 
7
+ # Installing gcc compiler and main library.
8
+ RUN apt update && apt install gcc cmake build-essential -y
9
+ RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
10
 
11
+ # Copying files into folder and making it working dir.
12
+ RUN mkdir app
13
+ COPY . /app
14
+ RUN chmod -R 777 /app
15
+ WORKDIR /app
 
 
 
 
 
 
16
 
 
17
 
18
+ # Installing wget and downloading model.
19
+ ADD https://huggingface.co/TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF/resolve/main/dolphin-2.2.1-ashhlimarp-mistral-7b.Q4_K_M.gguf /app/model.bin
20
+ RUN chmod -R 777 /app/model.bin
21
+ # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
22
 
23
+ # Fixing warnings from Transformers and Matplotlib
24
+ RUN mkdir -p /.cache/huggingface/hub -m 777
25
+ RUN mkdir -p /.config/matplotlib -m 777
26
+ RUN chmod -R 777 /.cache
27
+ RUN chmod -R 777 /.config
28
 
29
+ # Updating pip and installing everything from requirements
30
+ RUN python3 -m pip install -U pip setuptools wheel
31
+ RUN pip install --upgrade -r /app/requirements.txt
32
 
33
+ # Now it's time to run Gradio app!
34
+ CMD ["python", "gradio_app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing libraries
2
+ from llama_cpp import Llama
3
+ from time import time
4
+ import gradio as gr
5
+ import psutil
6
+
7
+ # Initing things
8
+ print("! INITING LLAMA MODEL !")
9
+ llm = Llama(model_path="./model.bin") # LLaMa model
10
+ llama_model_name = "TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF"
11
+ print("! INITING DONE !")
12
+
13
+ # Preparing things to work
14
+ title = "llama.cpp API"
15
+ desc = '''<h1>Hello, world!</h1>
16
+ This is showcase how to make own server with Llama2 model.<br>
17
+ I'm using here 7b model just for example. Also here's only CPU power.<br>
18
+ But you can use GPU power as well!<br><br>
19
+ <h1>How to GPU?</h1>
20
+ Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
21
+ <h1>How to test it on own machine?</h1>
22
+ You can install Docker, build image and run it. I made <code>`run-docker.sh`</code> for ya. To stop container run <code>`docker ps`</code>, find name of container and run <code>`docker stop _dockerContainerName_`</code><br>
23
+ Or you can once follow steps in Dockerfile and try it on your machine, not in Docker.<br>
24
+ <br>''' + f"Memory used: {psutil.virtual_memory()[2]}<br>" + '''
25
+ Powered by <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a> and <a href="https://www.gradio.app/">Gradio</a>.<br><br>'''
26
+
27
+ # Loading prompt
28
+ prompt = ""
29
+ system_message = ""
30
+
31
+ def generate_answer(request: str, max_tokens: int = 256, custom_prompt: str = None):
32
+ t0 = time()
33
+ logs = f"Request: {request}\nMax tokens: {max_tokens}\nCustom prompt: {custom_prompt}\n"
34
+ try:
35
+ maxTokens = max_tokens if 16 <= max_tokens <= 256 else 64
36
+ userPrompt = prompt.replace("{prompt}", request)
37
+ userPrompt = userPrompt.replace(
38
+ "{system_message}",
39
+ custom_prompt if isinstance(custom_prompt, str) and len(custom_prompt.strip()) > 1 and custom_prompt.strip() not in ['', None, ' '] else system_message
40
+ )
41
+ logs += f"\nFinal prompt: {userPrompt}\n"
42
+ except:
43
+ return "Not enough data! Check that you passed all needed data.", logs
44
+
45
+ try:
46
+ # this shitty fix will be until i willnt figure out why sometimes there is empty output
47
+ counter = 1
48
+ while counter <= 3:
49
+ logs += f"Attempt {counter} to generate answer...\n"
50
+ output = llm(userPrompt, max_tokens=maxTokens, stop=["<|im_end|>"], echo=False)
51
+ text = output["choices"][0]["text"]
52
+ if len(text.strip()) > 1 and text.strip() not in ['', None, ' ']:
53
+ break
54
+ counter += 1
55
+ logs += f"Final attempt: {counter}\n"
56
+ if len(text.strip()) <= 1 or text.strip() in ['', None, ' ']:
57
+ logs += f"Generated and aborted: {text}"
58
+ text = "Sorry, but something went wrong while generating answer. Try again or fix code. If you are maintainer of this space, look into logs."
59
+
60
+ logs += f"\nFinal: '''{text}'''"
61
+ logs += f"\n\nTime spent: {time()-t0}"
62
+ return text, logs
63
+ except Exception as e:
64
+ logs += str(e)
65
+ logs += f"\n\nTime spent: {time()-t0}"
66
+ return "Oops! Internal server error. Check the logs of space/instance.", logs
67
+
68
+ print("! LOAD GRADIO INTERFACE !")
69
+ demo = gr.Interface(
70
+ fn=generate_answer,
71
+ inputs=[
72
+ gr.components.Textbox(label="Input"),
73
+ gr.components.Number(value=256),
74
+ gr.components.Textbox(label="Custom system prompt"),
75
+ ],
76
+ outputs=[
77
+ gr.components.Textbox(label="Output"),
78
+ gr.components.Textbox(label="Logs")
79
+ ],
80
+ title=title,
81
+ description=desc,
82
+ allow_flagging='never'
83
+ )
84
+ demo.queue()
85
+ print("! LAUNCHING GRADIO !")
86
+ demo.launch(server_name="0.0.0.0")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ psutil
2
+ gradio