TomBombadyl commited on
Commit
5276ce7
·
verified ·
1 Parent(s): 2175d14

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -0
app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import requests
5
+ import gradio as gr
6
+
7
+ # Read secrets and sanitize URL
8
+ ENDPOINT_URL = (os.environ.get("ENDPOINT_URL") or "https://erxvjreo1onxvdf7.us-east4.gcp.endpoints.huggingface.cloud").strip().rstrip("/")
9
+ HF_TOKEN = (os.environ.get("HF_TOKEN") or "").strip()
10
+
11
+ # Debug logging
12
+ print(f"🚀 DEBUG: ENDPOINT_URL set to: {ENDPOINT_URL}")
13
+ print(f"🚀 DEBUG: HF_TOKEN present: {'Yes' if HF_TOKEN else 'No'}")
14
+
15
+ if not ENDPOINT_URL:
16
+ raise RuntimeError("Missing ENDPOINT_URL Space secret")
17
+
18
+ HEADERS = {
19
+ "Content-Type": "application/json",
20
+ "Accept": "application/json",
21
+ }
22
+ if HF_TOKEN:
23
+ HEADERS["Authorization"] = f"Bearer {HF_TOKEN}"
24
+
25
+ SYSTEM_PROMPT_DEFAULT = (
26
+ "You are a helpful AI assistant for Isaac Sim 5.0, Isaac Lab 2.1, and Omniverse Kit 107.3 robotics development. "
27
+ "You specialize in NVIDIA robotics development, computer vision, sensor integration, and simulation workflows. "
28
+ "Provide practical, code-focused guidance with complete examples and best practices."
29
+ )
30
+
31
+ DEFAULT_MAX_NEW_TOKENS = 1024
32
+ DEFAULT_MAX_INPUT_TOKENS = 2048
33
+
34
+ def to_single_turn(messages):
35
+ lines = []
36
+ for m in messages:
37
+ role = m.get("role", "user").capitalize()
38
+ lines.append(f"{role}: {m.get('content','')}")
39
+ lines.append("Assistant:")
40
+ return "\n".join(lines)
41
+
42
+ def call_endpoint(messages, parameters):
43
+ start = time.time()
44
+
45
+ # Debug logging
46
+ print(f"🔍 DEBUG: Calling endpoint: {ENDPOINT_URL}")
47
+ print(f"🔍 DEBUG: Headers: {HEADERS}")
48
+
49
+ # Prefer single-turn first (matches your handler expectations)
50
+ payload_inputs = {"inputs": to_single_turn(messages), "parameters": parameters}
51
+ print(f"🔍 DEBUG: Payload: {payload_inputs}")
52
+
53
+ resp = requests.post(ENDPOINT_URL, headers=HEADERS, json=payload_inputs, timeout=120)
54
+ latency = time.time() - start
55
+
56
+ print(f"🔍 DEBUG: Response status: {resp.status_code}")
57
+ print(f"🔍 DEBUG: Response body: {resp.text}")
58
+
59
+ if resp.status_code == 200:
60
+ data = resp.json()
61
+ text = data.get("generated_text") if isinstance(data, dict) else str(data)
62
+ return text or "", latency
63
+
64
+ # Fallback to messages for servers that support chat
65
+ print(f"🔍 DEBUG: First attempt failed, trying messages format...")
66
+ resp2 = requests.post(
67
+ ENDPOINT_URL,
68
+ headers=HEADERS,
69
+ json={"messages": messages, "parameters": parameters},
70
+ timeout=120,
71
+ )
72
+ latency = time.time() - start
73
+ print(f"🔍 DEBUG: Fallback response status: {resp2.status_code}")
74
+ print(f"🔍 DEBUG: Fallback response body: {resp2.text}")
75
+
76
+ if resp2.status_code == 200:
77
+ data = resp2.json()
78
+ text = data.get("generated_text") if isinstance(data, dict) else str(data)
79
+ return text or "", latency
80
+
81
+ return f"HTTP {resp.status_code}/{resp2.status_code}: {resp.text or resp2.text}", latency
82
+
83
+ def build_messages(chat_history, user_input, system_prompt):
84
+ messages = []
85
+ if system_prompt and system_prompt.strip():
86
+ messages.append({"role": "system", "content": system_prompt.strip()})
87
+ else:
88
+ messages.append({"role": "system", "content": SYSTEM_PROMPT_DEFAULT})
89
+ for u, b in chat_history:
90
+ if u:
91
+ messages.append({"role": "user", "content": u})
92
+ if b:
93
+ messages.append({"role": "assistant", "content": b})
94
+ if user_input:
95
+ messages.append({"role": "user", "content": user_input})
96
+ return messages
97
+
98
+ def trim_history(chat_history, max_turns=4):
99
+ return chat_history[-max_turns:]
100
+
101
+ def to_chatbot_messages(chat_history):
102
+ msgs = []
103
+ for u, a in chat_history:
104
+ if u:
105
+ msgs.append({"role": "user", "content": u})
106
+ if a:
107
+ msgs.append({"role": "assistant", "content": a})
108
+ return msgs
109
+
110
+ def respond(user_input, chat_history, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt):
111
+ if not user_input:
112
+ return gr.update(value=""), chat_history, to_chatbot_messages(chat_history), gr.update(value="")
113
+
114
+ chat_history = trim_history(chat_history, max_turns=4)
115
+
116
+ params = {
117
+ "max_new_tokens": int(max_new_tokens),
118
+ "temperature": float(temperature),
119
+ "top_p": float(top_p),
120
+ "max_input_tokens": int(max_input_tokens),
121
+ }
122
+ messages = build_messages(chat_history, user_input, system_prompt)
123
+
124
+ # Show the user message immediately
125
+ chat_history = chat_history + [(user_input, None)]
126
+ reply, latency = call_endpoint(messages, params)
127
+ chat_history[-1] = (user_input, reply)
128
+
129
+ # Clear input, update state, update chatbot (messages format), update latency
130
+ return "", chat_history, to_chatbot_messages(chat_history), f"{latency:.2f}s"
131
+
132
+ def new_chat():
133
+ return [], [], ""
134
+
135
+ custom_css = """
136
+ #app {max-width: 980px; margin: 0 auto;}
137
+ footer {visibility: hidden;}
138
+ .gradio-container {font-size: 14px;}
139
+ #controls .label-wrap {min-width: 160px;}
140
+ """
141
+
142
+ with gr.Blocks(title="Qwen2.5‑Coder‑7B‑Instruct‑Omni1.1 (Isaac Sim Robotics Assistant)", css=custom_css) as demo:
143
+ gr.Markdown("### Qwen2.5‑Coder‑7B‑Instruct‑Omni1.1\nChat with your Isaac Sim 5.0 robotics development assistant. This Space calls the TomBombadyl/Qwen2.5-Coder-7B-Instruct-Omni1.1 Inference Endpoint powered by NVIDIA L4 GPU.")
144
+
145
+ # Chat at the top (messages format to avoid deprecation)
146
+ chatbot = gr.Chatbot(height=520, show_copy_button=True, type="messages")
147
+
148
+ # Input row
149
+ with gr.Row():
150
+ user_input = gr.Textbox(placeholder="Ask about Isaac Sim robotics, computer vision, sensors, simulation...", lines=2, scale=8)
151
+ send_btn = gr.Button("Send", variant="primary", scale=1)
152
+ new_btn = gr.Button("New chat", scale=1)
153
+
154
+ # Right-aligned utility row
155
+ with gr.Row():
156
+ latency_lbl = gr.Label(value="", label="Latency")
157
+
158
+ # Advanced settings (collapsed)
159
+ with gr.Accordion("Advanced settings", open=False):
160
+ with gr.Row(elem_id="controls"):
161
+ temperature = gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="temperature")
162
+ top_p = gr.Slider(0.1, 1.0, value=0.7, step=0.01, label="top_p")
163
+ max_new_tokens = gr.Slider(16, 1024, value=DEFAULT_MAX_NEW_TOKENS, step=128, label="max_new_tokens")
164
+ max_input_tokens = gr.Slider(256, 8192, value=DEFAULT_MAX_INPUT_TOKENS, step=256, label="max_input_tokens")
165
+ system_prompt = gr.Textbox(
166
+ value=SYSTEM_PROMPT_DEFAULT,
167
+ label="System prompt",
168
+ lines=3,
169
+ placeholder="Optional system instruction for the assistant",
170
+ )
171
+
172
+ chat_state = gr.State([]) # still store as list of (user, assistant) tuples
173
+
174
+ # Return chatbot directly so responses render immediately
175
+ send_btn.click(
176
+ fn=respond,
177
+ inputs=[user_input, chat_state, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt],
178
+ outputs=[user_input, chat_state, chatbot, latency_lbl],
179
+ )
180
+ user_input.submit(
181
+ fn=respond,
182
+ inputs=[user_input, chat_state, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt],
183
+ outputs=[user_input, chat_state, chatbot, latency_lbl],
184
+ )
185
+
186
+ # New chat resets state and chatbot
187
+ new_btn.click(fn=new_chat, outputs=[chat_state, chatbot, latency_lbl])
188
+
189
+ # Enable queuing with defaults (avoid unsupported keyword args on older Gradio)
190
+ demo.queue()
191
+
192
+ if __name__ == "__main__":
193
+ demo.launch()