File size: 3,827 Bytes
9c1188f
 
4213f50
e3894fb
 
7e3fb58
d9e0520
 
 
 
 
 
 
 
 
 
 
ee032a8
d9e0520
 
 
 
 
 
 
 
 
 
ee032a8
7e3fb58
d9e0520
 
 
 
 
7e3fb58
 
d9e0520
 
 
 
 
 
 
 
 
 
fea7e7a
74d7e67
fea7e7a
 
6f94d14
74d7e67
6f94d14
 
 
74d7e67
6f94d14
 
078637c
cb5934e
 
2877fe7
078637c
 
 
 
 
 
 
 
74d7e67
078637c
 
7e3fb58
c903cf3
2877fe7
762532e
 
c903cf3
 
2877fe7
762532e
6f94d14
 
9c1188f
bbb68cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
import random
import subprocess
import time

def generate_response(user_message): #generate_response_token_by_token
    cmd = [
        "/app/llama.cpp/main",  # Path to the executable
        "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
        "-p", user_message,
        "-n", "400",
        "-e"
    ]

    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)

    start_time = time.time()
    alltokens = ""
    token_buffer = ''
    while True:
        # Read one character at a time
        char = process.stdout.read(1)
        if char == '' and process.poll() is not None:
            break
        if char != '':
            token_buffer += char
            if char == ' ' or char == '\n':  # Token delimiters
                elapsed_time = time.time() - start_time  # Calculate elapsed time
                alltokens += token_buffer
                yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds]"
                token_buffer = ''  # Reset token buffer
    
    # Yield the last token if there is any
    if token_buffer:
        elapsed_time = time.time() - start_time  # Calculate elapsed time
        alltokens += token_buffer
        yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds]"

    # Wait for the subprocess to finish if it hasn't already
    process.wait()

    # Check for any errors
    if process.returncode != 0:
        error_message = process.stderr.read()
        print(f"Error: {error_message}")


def custom_generate_response(cust_user_message):
    cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message + '\n\n'
    yield from generate_response(cust_user_message)

def custom_generate_response1(cust_user_message):
    cust_user_message = CustomPrompts[1] + '\n\n' + cust_user_message + '\n\n'
    yield from generate_response(cust_user_message)

def custom_generate_response2(cust_user_message):
    cust_user_message = CustomPrompts[2] + '\n' + cust_user_message + '\n\n'
    yield from generate_response(cust_user_message)

CustomPrompts = [
    "Write a Class Diagram based on the following text:",
    "Write a Pydot code based on the following text:",
    "Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:",
]

with gr.Blocks() as iface: 
    gr.Interface(
        fn=generate_response,
        inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
        outputs="text",
        title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
        description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.",
        flagging_dir="/usr/src/app/flagged",
    )
    #gr.Interface(fn=generate_response_token_by_token, inputs=gr.Textbox(lines=2, placeholder='Type prompt here...'), outputs="text", description="More Responsive streaming test")
    with gr.Group():
        gr.HTML("Test for wrapping generator (Instead of buttons tabs and dropdowns?)")
        MainOutput = gr.TextArea(placeholder='Output will show here')
        CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here')
        CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
        CustomButtonPydotcode = gr.Button(CustomPrompts[1])
        CustomButtonHappyMovieScene = gr.Button(CustomPrompts[2])
        CustomButtonClassDiagram.click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput)
        CustomButtonPydotcode.click(custom_generate_response1, inputs=[CustomButtonInput], outputs=MainOutput)
        CustomButtonHappyMovieScene.click(custom_generate_response2, inputs=[CustomButtonInput], outputs=MainOutput)

iface.queue().launch(server_name="0.0.0.0", share=True)