File size: 3,219 Bytes
9c1188f 4213f50 e3894fb 078637c 4213f50 6524289 4213f50 49ae654 078637c fea7e7a 078637c 49ae654 5dd2646 49ae654 e3894fb 5dd2646 49ae654 5dd2646 e3894fb 49ae654 4213f50 49ae654 fea7e7a 078637c ea62522 078637c 20ab8bc 078637c 8b084ce 9c1188f ce41f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
import random
import subprocess
import time
def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage
cmd = [
"/app/llama.cpp/main", # Path to the executable
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
"-p", user_message,
"-n", "400",
"-e"
]
# Start the subprocess
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
start_time = time.time()
alllines = ""
# Yield each line of output as it becomes available
for line in process.stdout:
alllines += " " + line
elapsed_time = time.time() - start_time # Calculate elapsed time
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
# Wait for the subprocess to finish if it hasn't already
process.wait()
# Check for any errors
if process.returncode != 0:
error_message = process.stderr.read()
print(f"Error: {error_message}")
def custom_generate_response(cust_user_message):
cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message
yield from generate_response(cust_user_message)
"""
def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage
user_message = builtinprompt + '\n\n ' + user_message
cmd = [
"/app/llama.cpp/main", # Path to the executable
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
"-p", user_message,
"-n", "400",
"-e"
]
# Start the subprocess
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
start_time = time.time()
alllines = ""
# Yield each line of output as it becomes available
for line in process.stdout:
alllines += " " + line
elapsed_time = time.time() - start_time # Calculate elapsed time
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
# Wait for the subprocess to finish if it hasn't already
process.wait()
# Check for any errors
if process.returncode != 0:
error_message = process.stderr.read()
print(f"Error: {error_message}")
"""
CustomPrompts = [
"Class Diagram for:",
"Pydot code for:",
]
with gr.Blocks() as iface:
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
outputs="text",
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
description="No Message History for now - Enter your message and get a response.",
flagging_dir="/usr/src/app/flagged",
)
gr.HTML()
MainOutput = gr.TextArea()
CustomButtonInput = gr.TextArea()
CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
CustomButtonPydotcode = gr.Button(CustomPrompts[1])
CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput)
CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput)
iface.launch(server_name="0.0.0.0") #share=True) |