Spaces:

080-ai
/

cutlass_v1

Runtime error

cutlass_v1 / app.py

Update app.py

35e6a4d verified about 1 year ago

1.48 kB

	import gradio as gr
	import os
	from openai import OpenAI
	import time

	# Initialize the OpenAI Client
	client = OpenAI(
	api_key=os.environ.get("RUNPOD_API_KEY"),
	base_url="https://api.runpod.ai/v2/vllm-k0g4c60zor9xuu/openai/v1",
	)

	def runpod_chat(question, history=None):
	# Explicitly initialize history if it's None
	if history is None:
	history = []
	history.append({"role": "user", "content": question})

	response_stream = client.chat.completions.create(
	model="ambrosfitz/llama-3-history",
	messages=history,
	temperature=0,
	max_tokens=150,
	stream=True,
	)

	# Stream the response and add to history
	full_response = ""
	for message in response_stream:
	response = message.choices[0].delta.content
	full_response += "RunPod: " + response + "\n"
	history.append({"role": "assistant", "content": response})
	time.sleep(0.3) # Simulate typing delay

	return full_response, history # Return full response and updated history to maintain state

	# Set up the Gradio interface
	iface = gr.Interface(
	fn=runpod_chat,
	inputs=[
	gr.Textbox(label="Enter your question:"),
	gr.State(default=[]) # Set default state explicitly
	],
	outputs=[
	gr.Textbox(label="Responses"),
	gr.State()
	],
	title="RunPod Chat",
	description="This app interfaces with RunPod's API to provide responses to your queries."
	)

	iface.launch()