Tobias Bergmann commited on
Commit
53cb438
·
1 Parent(s): 2e11c33

start server

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -8,24 +8,22 @@ from typing import Iterator, List, Dict
8
 
9
  import requests
10
  import json
 
11
  import gradio as gr
12
 
13
  today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
14
 
15
- SYS_PROMPT = f"""Knowledge Cutoff Date: April 2024.
16
- Today's Date: {today_date}.
17
  You are Granite, developed by IBM. You are a helpful AI assistant"""
18
- TITLE = "IBM Granite 3.1 8b Instruct from local GGUF server"
19
  DESCRIPTION = """
20
- <p>Granite 3.1 8b instruct is an open-source LLM supporting a 128k context window. Start with one of the sample prompts
21
- or enter your own. Keep in mind that AI can occasionally make mistakes.
22
  <span class="gr_docs_link">
23
  <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
24
  </span>
25
  </p>
26
  """
27
  LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
28
- MAX_INPUT_TOKEN_LENGTH = 128_000
29
  MAX_NEW_TOKENS = 1024
30
  TEMPERATURE = 0.7
31
  TOP_P = 0.85
@@ -39,9 +37,12 @@ gguf_path = hf_hub_download(
39
  local_dir="."
40
  )
41
 
42
- # TODO: chmod llama-server
43
- # TODO: start llama-server
44
- # ./llama-server -m granite-3.1-3b-a800m-instruct-Q8_0.gguf -ngl 0 --temp 0.0 -c 2048 -t 8 --port 8081
 
 
 
45
 
46
  def generate(
47
  message: str,
 
8
 
9
  import requests
10
  import json
11
+ import subprocess
12
  import gradio as gr
13
 
14
  today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
15
 
16
+ SYS_PROMPT = f"""Today's Date: {today_date}.
 
17
  You are Granite, developed by IBM. You are a helpful AI assistant"""
18
+ TITLE = "IBM Granite 3.1 3b a800 MoE Instruct from local GGUF server"
19
  DESCRIPTION = """
20
+ <p>Granite 3.1 3b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 
21
  <span class="gr_docs_link">
22
  <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
23
  </span>
24
  </p>
25
  """
26
  LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
 
27
  MAX_NEW_TOKENS = 1024
28
  TEMPERATURE = 0.7
29
  TOP_P = 0.85
 
37
  local_dir="."
38
  )
39
 
40
+ # start llama-server
41
+ subprocess.run(["chmod", "+x", "llama-server"])
42
+ command = ["./llama-server", "-m", "granite-3.1-3b-a800m-instruct-Q8_0.gguf", "-ngl", "0", "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
43
+ process = subprocess.Popen(command)
44
+ print(f"Llama-server process started with PID {process.pid}")
45
+
46
 
47
  def generate(
48
  message: str,