Spaces:

KwabsHug
/

FrontEndasPromptEngineeringTest

Sleeping

FrontEndasPromptEngineeringTest / app.py

kwabs22

Dynamic buttons

43b1821 over 1 year ago

8.96 kB

	import gradio as gr
	import psutil
	import subprocess
	import time

	def generate_response(user_message): #generate_response_token_by_token
	cmd = [
	"/app/llama.cpp/main", # Path to the executable
	"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
	"-p", user_message,
	"-n", "400",
	"-e"
	]

	process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
	process_monitor = psutil.Process(process.pid)


	start_time = time.time()
	monitor_start_time = time.time()
	alltokens = ""
	token_buffer = ''
	tokencount = 0
	try:
	while True:
	# Read one character at a time
	char = process.stdout.read(1)
	if char == '' and process.poll() is not None:
	break
	if char != '':
	token_buffer += char
	if char == ' ' or char == '\n': # Token delimiters
	elapsed_time = time.time() - start_time # Calculate elapsed time
	alltokens += token_buffer
	tokencount += 1
	yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds \| Tokens: { tokencount }]"
	token_buffer = '' # Reset token buffer
	# Log resource usage every minute
	if time.time() - monitor_start_time > 60:
	cpu_usage = process_monitor.cpu_percent()
	memory_usage = process_monitor.memory_info().rss # in bytes
	print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
	monitor_start_time = time.time() # Reset the timer

	# Yield the last token if there is any
	if token_buffer:
	elapsed_time = time.time() - start_time # Calculate elapsed time
	alltokens += token_buffer
	yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds \| Average Tokens per second: { round(tokencount / elapsed_time, 2) }]"

	finally:
	try:
	# Wait for the process to complete, with a timeout
	process.wait(timeout=60) # Timeout in seconds
	except subprocess.TimeoutExpired:
	print("Process didn't complete within the timeout. Killing it.")
	process.kill()
	process.wait() # Ensure proper cleanup
	# Wait for the subprocess to finish if it hasn't already
	process.stdout.close()
	process.stderr.close()

	# Check for any errors
	if process.returncode != 0:
	error_message = process.stderr.read()
	print(f"Error: {error_message}")

	# def custom_generate_response0(cust_user_message):
	# cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message + '\n\nClass Diagram:'
	# yield from generate_response(cust_user_message)

	# def custom_generate_response1(cust_user_message):
	# cust_user_message = CustomPrompts[1] + '\n\n' + cust_user_message + '\n\nPydot Code:'
	# yield from generate_response(cust_user_message)

	# def custom_generate_response2(cust_user_message):
	# cust_user_message = CustomPrompts[2] + '\n' + cust_user_message + '\n\nScene Details'
	# yield from generate_response(cust_user_message)

	# def custom_generate_response3(cust_user_message):
	# cust_user_message = CustomPrompts[3] + '\n' + cust_user_message + '\n\nTeardown Details:'
	# yield from generate_response(cust_user_message)

	# def custom_generate_response4(cust_user_message):
	# cust_user_message = CustomPrompts[4] + '\n' + cust_user_message + '\n\nManufacturing Details:'
	# yield from generate_response(cust_user_message)

	# def custom_generate_response5(cust_user_message):
	# cust_user_message = CustomPrompts[5] + '\n' + cust_user_message + '\n\nConsiderations:'
	# yield from generate_response(cust_user_message)

	# CustomPrompts = [
	# "Write a Class Diagram based on the following text:",
	# "Write a Pydot code based on the following text:",
	# "Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:",
	# "Explain a teardown of the product mentioned in the following text:",
	# "Explain the manufacturing of the product mentioned in the following text:",
	# "Explain the marketing considerations of the product mentioned in the following text:",
	# ]

	# with gr.Blocks() as iface:
	# gr.Interface(
	# fn=generate_response,
	# inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
	# outputs="text",
	# title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs or 800+ secs \| Over 100 token prompt always slow)",
	# description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.",
	# flagging_dir="/usr/src/app/flagged",
	# )
	# #gr.Interface(fn=generate_response_token_by_token, inputs=gr.Textbox(lines=2, placeholder='Type prompt here...'), outputs="text", description="More Responsive streaming test")
	# with gr.Group():
	# gr.HTML("Test for wrapping generator (Instead of buttons tabs and dropdowns?)")
	# MainOutput = gr.TextArea(placeholder='Output will show here')
	# CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here')
	# CustomButtonTeardown = gr.Button(CustomPrompts[3])
	# CustomButtonManufacture = gr.Button(CustomPrompts[4])
	# CustomButtonMarketingConsid = gr.Button(CustomPrompts[5])
	# CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
	# CustomButtonPydotcode = gr.Button(CustomPrompts[1])
	# CustomButtonHappyMovieScene = gr.Button(CustomPrompts[2])
	# CustomButtonClassDiagram.click(custom_generate_response0, inputs=[CustomButtonInput], outputs=MainOutput)
	# CustomButtonPydotcode.click(custom_generate_response1, inputs=[CustomButtonInput], outputs=MainOutput)
	# CustomButtonHappyMovieScene.click(custom_generate_response2, inputs=[CustomButtonInput], outputs=MainOutput)
	# CustomButtonTeardown.click(custom_generate_response3, inputs=[CustomButtonInput], outputs=MainOutput)
	# CustomButtonManufacture.click(custom_generate_response4, inputs=[CustomButtonInput], outputs=MainOutput)
	# CustomButtonMarketingConsid.click(custom_generate_response5, inputs=[CustomButtonInput], outputs=MainOutput)


	def custom_generate_response(cust_user_message, prompt_index):
	"""
	Generates a custom response based on the user message and the selected prompt,
	including a custom ending specific to the prompt.

	Parameters:
	- cust_user_message: The message input from the user.
	- prompt_index: The index of the custom prompt to use.
	"""
	prompt, ending = CustomPrompts[prompt_index] # Unpack the prompt and its ending
	cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}"
	yield from generate_response(cust_user_message)

	CustomPrompts = [
	("Write a Class Diagram based on the following text:", "Class Diagram:"),
	("Write a Pydot code based on the following text:", "Pydot Code:"),
	("Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:", "Scene Details"),
	("Explain a teardown of the product mentioned in the following text:", "Teardown Details:"),
	("Explain the manufacturing of the product mentioned in the following text:", "Manufacturing Details:"),
	("Explain the marketing considerations of the product mentioned in the following text:", "Considerations:"),
	("Explain the target users considerations of the product mentioned in the following text:", "Target Users Considerations:"),
	]

	with gr.Blocks() as iface:
	gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
	outputs="text",
	title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs or 800+ secs)",
	description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.",
	flagging_dir="/usr/src/app/flagged",
	)

	with gr.Group():
	gr.HTML("Test for wrapping generator (Instead of buttons tabs and dropdowns?)")
	MainOutput = gr.TextArea(placeholder='Output will show here')
	CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here')

	# Dynamically create buttons and assign actions
	for index, (prompt, _) in enumerate(CustomPrompts):
	button = gr.Button(prompt)
	button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index)], outputs=MainOutput)

	iface.queue().launch(server_name="0.0.0.0", share=True)