Spaces:

TomBombadyl
/

Omniverse_Developer_Assistant

Running

App Files Files Community

TomBombadyl commited on 10 days ago

Commit

5276ce7

verified ·

1 Parent(s): 2175d14

Create app.py

Browse files

Files changed (1) hide show

app.py +193 -0

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+import json
+import time
+import requests
+import gradio as gr
+# Read secrets and sanitize URL
+ENDPOINT_URL = (os.environ.get("ENDPOINT_URL") or "https://erxvjreo1onxvdf7.us-east4.gcp.endpoints.huggingface.cloud").strip().rstrip("/")
+HF_TOKEN = (os.environ.get("HF_TOKEN") or "").strip()
+# Debug logging
+print(f"🚀 DEBUG: ENDPOINT_URL set to: {ENDPOINT_URL}")
+print(f"🚀 DEBUG: HF_TOKEN present: {'Yes' if HF_TOKEN else 'No'}")
+if not ENDPOINT_URL:
+	raise RuntimeError("Missing ENDPOINT_URL Space secret")
+HEADERS = {
+	"Content-Type": "application/json",
+	"Accept": "application/json",
+}
+if HF_TOKEN:
+	HEADERS["Authorization"] = f"Bearer {HF_TOKEN}"
+SYSTEM_PROMPT_DEFAULT = (
+	"You are a helpful AI assistant for Isaac Sim 5.0, Isaac Lab 2.1, and Omniverse Kit 107.3 robotics development. "
+	"You specialize in NVIDIA robotics development, computer vision, sensor integration, and simulation workflows. "
+	"Provide practical, code-focused guidance with complete examples and best practices."
+)
+DEFAULT_MAX_NEW_TOKENS = 1024
+DEFAULT_MAX_INPUT_TOKENS = 2048
+def to_single_turn(messages):
+	lines = []
+	for m in messages:
+		role = m.get("role", "user").capitalize()
+		lines.append(f"{role}: {m.get('content','')}")
+	lines.append("Assistant:")
+	return "\n".join(lines)
+def call_endpoint(messages, parameters):
+	start = time.time()
+	# Debug logging
+	print(f"🔍 DEBUG: Calling endpoint: {ENDPOINT_URL}")
+	print(f"🔍 DEBUG: Headers: {HEADERS}")
+	# Prefer single-turn first (matches your handler expectations)
+	payload_inputs = {"inputs": to_single_turn(messages), "parameters": parameters}
+	print(f"🔍 DEBUG: Payload: {payload_inputs}")
+	resp = requests.post(ENDPOINT_URL, headers=HEADERS, json=payload_inputs, timeout=120)
+	latency = time.time() - start
+	print(f"🔍 DEBUG: Response status: {resp.status_code}")
+	print(f"🔍 DEBUG: Response body: {resp.text}")
+	if resp.status_code == 200:
+		data = resp.json()
+		text = data.get("generated_text") if isinstance(data, dict) else str(data)
+		return text or "", latency
+	# Fallback to messages for servers that support chat
+	print(f"🔍 DEBUG: First attempt failed, trying messages format...")
+	resp2 = requests.post(
+		ENDPOINT_URL,
+		headers=HEADERS,
+		json={"messages": messages, "parameters": parameters},
+		timeout=120,
+	)
+	latency = time.time() - start
+	print(f"🔍 DEBUG: Fallback response status: {resp2.status_code}")
+	print(f"🔍 DEBUG: Fallback response body: {resp2.text}")
+	if resp2.status_code == 200:
+		data = resp2.json()
+		text = data.get("generated_text") if isinstance(data, dict) else str(data)
+		return text or "", latency
+	return f"HTTP {resp.status_code}/{resp2.status_code}: {resp.text or resp2.text}", latency
+def build_messages(chat_history, user_input, system_prompt):
+	messages = []
+	if system_prompt and system_prompt.strip():
+		messages.append({"role": "system", "content": system_prompt.strip()})
+	else:
+		messages.append({"role": "system", "content": SYSTEM_PROMPT_DEFAULT})
+	for u, b in chat_history:
+		if u:
+			messages.append({"role": "user", "content": u})
+		if b:
+			messages.append({"role": "assistant", "content": b})
+	if user_input:
+		messages.append({"role": "user", "content": user_input})
+	return messages
+def trim_history(chat_history, max_turns=4):
+	return chat_history[-max_turns:]
+def to_chatbot_messages(chat_history):
+	msgs = []
+	for u, a in chat_history:
+		if u:
+			msgs.append({"role": "user", "content": u})
+		if a:
+			msgs.append({"role": "assistant", "content": a})
+	return msgs
+def respond(user_input, chat_history, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt):
+	if not user_input:
+		return gr.update(value=""), chat_history, to_chatbot_messages(chat_history), gr.update(value="")
+	chat_history = trim_history(chat_history, max_turns=4)
+	params = {
+		"max_new_tokens": int(max_new_tokens),
+		"temperature": float(temperature),
+		"top_p": float(top_p),
+		"max_input_tokens": int(max_input_tokens),
+	}
+	messages = build_messages(chat_history, user_input, system_prompt)
+	# Show the user message immediately
+	chat_history = chat_history + [(user_input, None)]
+	reply, latency = call_endpoint(messages, params)
+	chat_history[-1] = (user_input, reply)
+	# Clear input, update state, update chatbot (messages format), update latency
+	return "", chat_history, to_chatbot_messages(chat_history), f"{latency:.2f}s"
+def new_chat():
+	return [], [], ""
+custom_css = """
+#app {max-width: 980px; margin: 0 auto;}
+footer {visibility: hidden;}
+.gradio-container {font-size: 14px;}
+#controls .label-wrap {min-width: 160px;}
+"""
+with gr.Blocks(title="Qwen2.5‑Coder‑7B‑Instruct‑Omni1.1 (Isaac Sim Robotics Assistant)", css=custom_css) as demo:
+	gr.Markdown("### Qwen2.5‑Coder‑7B‑Instruct‑Omni1.1\nChat with your Isaac Sim 5.0 robotics development assistant. This Space calls the TomBombadyl/Qwen2.5-Coder-7B-Instruct-Omni1.1 Inference Endpoint powered by NVIDIA L4 GPU.")
+	# Chat at the top (messages format to avoid deprecation)
+	chatbot = gr.Chatbot(height=520, show_copy_button=True, type="messages")
+	# Input row
+	with gr.Row():
+		user_input = gr.Textbox(placeholder="Ask about Isaac Sim robotics, computer vision, sensors, simulation...", lines=2, scale=8)
+		send_btn = gr.Button("Send", variant="primary", scale=1)
+		new_btn = gr.Button("New chat", scale=1)
+	# Right-aligned utility row
+	with gr.Row():
+		latency_lbl = gr.Label(value="", label="Latency")
+	# Advanced settings (collapsed)
+	with gr.Accordion("Advanced settings", open=False):
+		with gr.Row(elem_id="controls"):
+			temperature = gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="temperature")
+			top_p = gr.Slider(0.1, 1.0, value=0.7, step=0.01, label="top_p")
+			max_new_tokens = gr.Slider(16, 1024, value=DEFAULT_MAX_NEW_TOKENS, step=128, label="max_new_tokens")
+			max_input_tokens = gr.Slider(256, 8192, value=DEFAULT_MAX_INPUT_TOKENS, step=256, label="max_input_tokens")
+		system_prompt = gr.Textbox(
+			value=SYSTEM_PROMPT_DEFAULT,
+			label="System prompt",
+			lines=3,
+			placeholder="Optional system instruction for the assistant",
+		)
+	chat_state = gr.State([])  # still store as list of (user, assistant) tuples
+	# Return chatbot directly so responses render immediately
+	send_btn.click(
+		fn=respond,
+		inputs=[user_input, chat_state, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt],
+		outputs=[user_input, chat_state, chatbot, latency_lbl],
+	)
+	user_input.submit(
+		fn=respond,
+		inputs=[user_input, chat_state, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt],
+		outputs=[user_input, chat_state, chatbot, latency_lbl],
+	)
+	# New chat resets state and chatbot
+	new_btn.click(fn=new_chat, outputs=[chat_state, chatbot, latency_lbl])
+# Enable queuing with defaults (avoid unsupported keyword args on older Gradio)
+demo.queue()
+if __name__ == "__main__":
+	demo.launch()