Spaces:

KwabsHug
/

FrontEndasPromptEngineeringTest

Sleeping

FrontEndasPromptEngineeringTest / app.py

kwabs22

Testing Stable LM 2 1.6B Zephyr - subprocess issues

49ae654 over 1 year ago

2.21 kB

	import gradio as gr
	#from llama_cpp import Llama
	import random
	import subprocess

	# Initialize model
	#llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))

	"""
	def generate_response(user_message):
	encodeduserm = b"### Human: " + user_message.encode('utf-8') + b"\n### Assistant:"
	tokens = llm.tokenize(encodeduserm)
	output = b""
	count = 0

	for token in llm.generate(tokens, top_k=40, top_p=0.95, temp=0.72, repeat_penalty=1.1):
	text = llm.detokenize([token])
	output += text
	count += 1
	if count >= 500 or (token == llm.token_eos()):
	break
	return output.decode()
	"""

	"""
	def generate_response(user_message):
	print("Before request")
	cmd = [
	"/app/llama.cpp/main", # Path to the executable
	"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
	"-p", user_message,
	"-n", "400",
	"-e"
	]
	result = subprocess.run(cmd, capture_output=True, text=True)
	print("After response")
	return result.stdout
	"""

	def generate_response(user_message):
	print("Before request")
	cmd = [
	"/app/llama.cpp/main", # Path to the executable
	"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
	"-p", user_message,
	"-n", "400",
	"-e"
	]

	# Start the subprocess
	process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

	# Yield each line of output as it becomes available
	for line in process.stdout:
	yield line

	# Wait for the subprocess to finish if it hasn't already
	process.wait()

	print("After response")
	# Check for any errors
	if process.returncode != 0:
	error_message = process.stderr.read()
	print(f"Error: {error_message}")

	iface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
	outputs="text",
	title="LLaMA Chat Interface",
	description="Enter your message and get a response from the LLaMA model.",
	flagging_dir="/usr/src/app/flagged",
	)

	iface.launch(server_name="0.0.0.0") #share=True)