Spaces:
Running
Running
Tobias Bergmann
commited on
Commit
·
53cb438
1
Parent(s):
2e11c33
start server
Browse files
app.py
CHANGED
@@ -8,24 +8,22 @@ from typing import Iterator, List, Dict
|
|
8 |
|
9 |
import requests
|
10 |
import json
|
|
|
11 |
import gradio as gr
|
12 |
|
13 |
today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
14 |
|
15 |
-
SYS_PROMPT = f"""
|
16 |
-
Today's Date: {today_date}.
|
17 |
You are Granite, developed by IBM. You are a helpful AI assistant"""
|
18 |
-
TITLE = "IBM Granite 3.1
|
19 |
DESCRIPTION = """
|
20 |
-
<p>Granite 3.1
|
21 |
-
or enter your own. Keep in mind that AI can occasionally make mistakes.
|
22 |
<span class="gr_docs_link">
|
23 |
<a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
|
24 |
</span>
|
25 |
</p>
|
26 |
"""
|
27 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
28 |
-
MAX_INPUT_TOKEN_LENGTH = 128_000
|
29 |
MAX_NEW_TOKENS = 1024
|
30 |
TEMPERATURE = 0.7
|
31 |
TOP_P = 0.85
|
@@ -39,9 +37,12 @@ gguf_path = hf_hub_download(
|
|
39 |
local_dir="."
|
40 |
)
|
41 |
|
42 |
-
#
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
45 |
|
46 |
def generate(
|
47 |
message: str,
|
|
|
8 |
|
9 |
import requests
|
10 |
import json
|
11 |
+
import subprocess
|
12 |
import gradio as gr
|
13 |
|
14 |
today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
15 |
|
16 |
+
SYS_PROMPT = f"""Today's Date: {today_date}.
|
|
|
17 |
You are Granite, developed by IBM. You are a helpful AI assistant"""
|
18 |
+
TITLE = "IBM Granite 3.1 3b a800 MoE Instruct from local GGUF server"
|
19 |
DESCRIPTION = """
|
20 |
+
<p>Granite 3.1 3b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
|
|
|
21 |
<span class="gr_docs_link">
|
22 |
<a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
|
23 |
</span>
|
24 |
</p>
|
25 |
"""
|
26 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
|
|
27 |
MAX_NEW_TOKENS = 1024
|
28 |
TEMPERATURE = 0.7
|
29 |
TOP_P = 0.85
|
|
|
37 |
local_dir="."
|
38 |
)
|
39 |
|
40 |
+
# start llama-server
|
41 |
+
subprocess.run(["chmod", "+x", "llama-server"])
|
42 |
+
command = ["./llama-server", "-m", "granite-3.1-3b-a800m-instruct-Q8_0.gguf", "-ngl", "0", "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
|
43 |
+
process = subprocess.Popen(command)
|
44 |
+
print(f"Llama-server process started with PID {process.pid}")
|
45 |
+
|
46 |
|
47 |
def generate(
|
48 |
message: str,
|