File size: 3,219 Bytes
9c1188f
 
4213f50
e3894fb
 
078637c
4213f50
6524289
 
4213f50
 
 
 
49ae654
078637c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fea7e7a
 
 
 
 
 
078637c
 
 
49ae654
 
 
 
 
 
 
5dd2646
49ae654
 
 
e3894fb
5dd2646
 
49ae654
 
5dd2646
e3894fb
 
49ae654
 
 
4213f50
49ae654
 
 
 
fea7e7a
078637c
 
ea62522
 
078637c
 
 
 
 
 
 
 
 
 
 
 
 
20ab8bc
078637c
 
8b084ce
 
9c1188f
ce41f2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import random
import subprocess
import time

def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage
    cmd = [
        "/app/llama.cpp/main",  # Path to the executable
        "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
        "-p", user_message,
        "-n", "400",
        "-e"
    ]

    # Start the subprocess
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    start_time = time.time()
    alllines = ""

    # Yield each line of output as it becomes available
    for line in process.stdout:
        alllines += " " + line
        elapsed_time = time.time() - start_time  # Calculate elapsed time
        yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
    
    # Wait for the subprocess to finish if it hasn't already
    process.wait()

    # Check for any errors
    if process.returncode != 0:
        error_message = process.stderr.read()
        print(f"Error: {error_message}")


def custom_generate_response(cust_user_message):
    cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message
    yield from generate_response(cust_user_message)

"""
def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage
    user_message = builtinprompt + '\n\n ' + user_message

    cmd = [
        "/app/llama.cpp/main",  # Path to the executable
        "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
        "-p", user_message,
        "-n", "400",
        "-e"
    ]

    # Start the subprocess
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    start_time = time.time()
    alllines = ""

    # Yield each line of output as it becomes available
    for line in process.stdout:
        alllines += " " + line
        elapsed_time = time.time() - start_time  # Calculate elapsed time
        yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
    
    # Wait for the subprocess to finish if it hasn't already
    process.wait()

    # Check for any errors
    if process.returncode != 0:
        error_message = process.stderr.read()
        print(f"Error: {error_message}")
"""

CustomPrompts = [
    "Class Diagram for:",
    "Pydot code for:",
]

with gr.Blocks() as iface: 
    gr.Interface(
        fn=generate_response,
        inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
        outputs="text",
        title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
        description="No Message History for now - Enter your message and get a response.",
        flagging_dir="/usr/src/app/flagged",
    )
    gr.HTML()
    MainOutput = gr.TextArea()
    CustomButtonInput = gr.TextArea()
    CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
    CustomButtonPydotcode = gr.Button(CustomPrompts[1])
    CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput)
    CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput)

iface.launch(server_name="0.0.0.0") #share=True)