gguf-local-server

Sleeping

Tobias Bergmann commited on Feb 20

Commit

53cb438

1 Parent(s): 2e11c33

start server

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,24 +8,22 @@ from typing import Iterator, List, Dict
 import requests
 import json
 import gradio as gr
 today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
-SYS_PROMPT = f"""Knowledge Cutoff Date: April 2024.
-Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
-TITLE = "IBM Granite 3.1 8b Instruct from local GGUF server"
 DESCRIPTION = """
-<p>Granite 3.1 8b instruct is an open-source LLM supporting a 128k context window. Start with one of the sample prompts
-or enter your own. Keep in mind that AI can occasionally make mistakes.
 <span class="gr_docs_link">
 <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
 </span>
 </p>
 """
 LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
-MAX_INPUT_TOKEN_LENGTH = 128_000
 MAX_NEW_TOKENS = 1024
 TEMPERATURE = 0.7
 TOP_P = 0.85
@@ -39,9 +37,12 @@ gguf_path = hf_hub_download(
             local_dir="."
         )
-# TODO: chmod llama-server
-# TODO: start llama-server
-# ./llama-server -m granite-3.1-3b-a800m-instruct-Q8_0.gguf -ngl 0 --temp 0.0 -c 2048 -t 8 --port 8081
 def generate(
     message: str,

 import requests
 import json
+import subprocess
 import gradio as gr
 today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
+SYS_PROMPT = f"""Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
+TITLE = "IBM Granite 3.1 3b a800 MoE Instruct from local GGUF server"
 DESCRIPTION = """
+<p>Granite 3.1 3b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 <span class="gr_docs_link">
 <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
 </span>
 </p>
 """
 LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
 MAX_NEW_TOKENS = 1024
 TEMPERATURE = 0.7
 TOP_P = 0.85
             local_dir="."
         )
+# start llama-server
+subprocess.run(["chmod", "+x", "llama-server"])
+command = ["./llama-server", "-m", "granite-3.1-3b-a800m-instruct-Q8_0.gguf", "-ngl", "0", "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
+process = subprocess.Popen(command)
+print(f"Llama-server process started with PID {process.pid}")
 def generate(
     message: str,