File size: 10,689 Bytes
9c1188f
6af00ba
4213f50
e3894fb
 
079e3f7
 
7d94414
 
 
 
079e3f7
 
 
 
 
 
 
 
 
7e3fb58
d9e0520
 
 
 
 
 
 
 
 
6af00ba
 
d9e0520
 
0ebff8f
ee032a8
d9e0520
0ebff8f
6af00ba
 
 
 
 
 
 
 
 
 
 
0ebff8f
 
6af00ba
 
0ebff8f
6af00ba
 
 
0ebff8f
d9e0520
6af00ba
 
 
 
b46f5ab
0ebff8f
6af00ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3267959
 
43b1821
3267959
 
43b1821
 
 
 
3267959
43b1821
3267959
43b1821
e8f7917
 
098fa9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35fa9a6
35ed179
098fa9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d52d19e
098fa9f
 
 
d52d19e
098fa9f
 
 
 
9c1188f
bbb68cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import gradio as gr
import psutil
import subprocess
import time

#placeholders for api use
def generate_response_by_api(user_message):
    FinalOutput = ""
    #return FinalOutput
    pass

def custom_generate_response_by_api(cust_user_message, prompt_index, prompts_list):
    prompt, ending = prompts_list[prompt_index]  # Unpack the prompt and its ending from the provided list
    cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}"
    #return generate_response(cust_user_message)
    pass

#-----------------------------------------------------------------------------------------------------------------------

#Local gguf model using llama.cpp
def generate_response(user_message): #generate_response_token_by_token
    cmd = [
        "/app/llama.cpp/main",  # Path to the executable
        "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
        "-p", user_message,
        "-n", "400",
        "-e"
    ]

    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
    process_monitor = psutil.Process(process.pid)


    start_time = time.time()
    monitor_start_time = time.time()
    alltokens = ""
    token_buffer = ''
    tokencount = 0
    try:
        while True:
            # Read one character at a time
            char = process.stdout.read(1)
            if char == '' and process.poll() is not None:
                break
            if char != '':
                token_buffer += char
                if char == ' ' or char == '\n':  # Token delimiters
                    elapsed_time = time.time() - start_time  # Calculate elapsed time
                    alltokens += token_buffer
                    tokencount += 1
                    yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]"
                    token_buffer = ''  # Reset token buffer
            # Log resource usage every minute
            if time.time() - monitor_start_time > 60:
                cpu_usage = process_monitor.cpu_percent()
                memory_usage = process_monitor.memory_info().rss  # in bytes 
                print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
                monitor_start_time = time.time()  # Reset the timer
    
        # Yield the last token if there is any
        if token_buffer:
            elapsed_time = time.time() - start_time  # Calculate elapsed time
            alltokens += token_buffer
            yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { round(tokencount / elapsed_time, 2) }]"

    finally:
        try:
            # Wait for the process to complete, with a timeout
            process.wait(timeout=60)  # Timeout in seconds
        except subprocess.TimeoutExpired:
            print("Process didn't complete within the timeout. Killing it.")
            process.kill()
            process.wait()  # Ensure proper cleanup
        # Wait for the subprocess to finish if it hasn't already
        process.stdout.close()
        process.stderr.close()

        # Check for any errors
        if process.returncode != 0:
            error_message = process.stderr.read()
            print(f"Error: {error_message}")
            
def custom_generate_response(cust_user_message, prompt_index, prompts_list):
    """
    Generates a custom response based on the user message, the selected prompt,
    and the provided list of prompts, including a custom ending specific to the prompt.

    Parameters:
    - cust_user_message: The message input from the user.
    - prompt_index: The index of the custom prompt to use.
    - prompts_list: The list of prompts to use for generating the response.
    """
    prompt, ending = prompts_list[prompt_index]  # Unpack the prompt and its ending from the provided list
    cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}"
    yield from generate_response(cust_user_message)

Allprompts = {
    "Custom Prompts" : [
        ("Write a Class Diagram based on the following text:", "Class Diagram:"),
        ("Write a Pydot code based on the following text:", "Pydot Code:"),
        ("Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:", "Scene Details"),
        ("Explain a teardown of the product mentioned in the following text:", "Teardown Details:"),
        ("Explain the manufacturing of the product mentioned in the following text:", "Manufacturing Details:"),
        ("Explain the marketing considerations of the product mentioned in the following text:", "Considerations:"),
        ("Explain the target users considerations of the product mentioned in the following text:", "Target Users Considerations:"),
        ("My problem to solve is", "- please make 10 sub problems have to solve from this:"),
    ],
    "Business Prompts" : [
        ("Suggest Product ideas just based off the following text:", "Products:"),
        ("Write an outline for a business plan for: " , ""),
        ("Write an example of a detailed report for a Executive Summary for " , "Executive Summary:"),
        ("Write an example of a detailed report for a Company Description for " , "Company Description:"),
        ("Write an example of a detailed report for a Market Analysis for " , "Market Analysis:"),
        ("Write an example of a detailed report for a Marketing and Sales Strategy for " , "Marketing and Sales Strategy:"),
        ("Write an example of a detailed report for a Product Development for " , "Product Development:"),
        ("Write an example of a detailed report for a Operations and Management for " , "Operations and Management:"),
        ("Write an example of a detailed report for a Financial Projections for " , "Financial Projections:"),
        ("Explain how this to make this product unique from competitors:", "Considerations:"),
    ],
    "Programming Pattern Prompts" : [
        ("Demonstrate a builder pattern in go:", ""),
        ("Demonstrate a zero cost abstractions in go:", ""),
        ("Demonstrate a builder pattern in rust:", ""),
        ("Demonstrate a Polymorphism in rust:", ""),
        ("Explain how RAII pattern affects rust:", ""),
        ("Demonstrate a builder pattern in c++:", ""),
        ("Explain when to consider using a builder pattern in go:", ""),
    ],
    "Creativity Prompts" : [
        ("Make the following text more vague:", "Vague version:"),
        ("Turn the following text into a bunch of rules:", "Rules:"),
        ("What Syllogisms can be made from this text:", "Syllogisms:"),
        ("Reimagine the following text:", ""),
    ],
    "Game Based" : [
        ("What obstacles to growth exist in the following text:", "Obstacles:"),
        ("Write a story for the basis of a random game", ""),
        ("What are common themes in games?", ""),
        ("Write Three factions and why they are at conflict based on the following text:", "Faction 1:"),
    ]
}

with gr.Blocks() as iface:
    with gr.Tab("Single prompt"):
        gr.HTML("<a href='https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr'> -- Original StabilityAI demo -- </a> | ")
        gr.Interface(
            fn=generate_response,
            inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
            outputs="text",
            title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs (when this HF space is updated) or 800+ secs(HF space open for long))",
            description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.",
            flagging_dir="/usr/src/app/flagged",
        )

        gr.HTML("Any standard way of thinking / Repetitive idea / rule of thumb / advice can be turned into a button (In a timeline?)")
        gr.HTML("LLM powered Buttons as the new notetaking? (Youtube Video to prompt pipeline?)<br><br>List to buttons (Instead of buttons tabs and dropdowns maybe?)")

        MainOutput = gr.TextArea(placeholder='Output will show here')
        CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here')

        # with gr.Accordion("Random Ideas"):
        #     with gr.Group():        
        #         for index, (prompt, _) in enumerate(CustomPrompts):
        #             button = gr.Button(prompt)
        #             # Pass CustomPrompts list as an argument
        #             button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(CustomPrompts)], outputs=MainOutput)

        # with gr.Accordion("General Product and Business based", open=False):
        #     with gr.Group():
        #         for index, (prompt, _) in enumerate(BusinessPrompts):
        #             button = gr.Button(prompt)
        #             # Pass BusinessPrompts list as an argument
        #             button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(BusinessPrompts)], outputs=MainOutput)

        # with gr.Accordion("General Programming Pattern based", open=False):
        #     with gr.Group():
        #         for index, (prompt, _) in enumerate(ProgrammingPatternPrompts):
        #             button = gr.Button(prompt)
        #             # Pass BusinessPrompts list as an argument
        #             button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(ProgrammingPatternPrompts)], outputs=MainOutput)

        # with gr.Accordion("General Creativity Pattern based", open=False):
        #     with gr.Group():
        #         for index, (prompt, _) in enumerate(CreativityPrompts):
        #             button = gr.Button(prompt)
        #             # Pass BusinessPrompts list as an argument
        #             button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(CreativityPrompts)], outputs=MainOutput)

        for category_name, category_prompts in Allprompts.items():
            with gr.Accordion(f"General {category_name} Pattern based", open=False):
                with gr.Group():
                    for index, (prompt, _) in enumerate(category_prompts):
                        button = gr.Button(prompt)
                        button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(category_name)], outputs=MainOutput)


    with gr.Tab("Workflow Brainstom"):
        gr.HTML("Workflow = premeditated events --- need a timeline before prompts")

iface.queue().launch(server_name="0.0.0.0", share=True)