TomBombadyl commited on
Commit
2175d14
·
verified ·
1 Parent(s): 3339ba0

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -193
app.py DELETED
@@ -1,193 +0,0 @@
1
- import os
2
- import json
3
- import time
4
- import requests
5
- import gradio as gr
6
-
7
- # Read secrets and sanitize URL
8
- ENDPOINT_URL = (os.environ.get("ENDPOINT_URL") or "https://erxvjreo1onxvdf7.us-east4.gcp.endpoints.huggingface.cloud").strip().rstrip("/")
9
- HF_TOKEN = (os.environ.get("HF_TOKEN") or "").strip()
10
-
11
- # Debug logging
12
- print(f"🚀 DEBUG: ENDPOINT_URL set to: {ENDPOINT_URL}")
13
- print(f"🚀 DEBUG: HF_TOKEN present: {'Yes' if HF_TOKEN else 'No'}")
14
-
15
- if not ENDPOINT_URL:
16
- raise RuntimeError("Missing ENDPOINT_URL Space secret")
17
-
18
- HEADERS = {
19
- "Content-Type": "application/json",
20
- "Accept": "application/json",
21
- }
22
- if HF_TOKEN:
23
- HEADERS["Authorization"] = f"Bearer {HF_TOKEN}"
24
-
25
- SYSTEM_PROMPT_DEFAULT = (
26
- "You are a helpful AI assistant for Isaac Sim 5.0, Isaac Lab 2.1, and Omniverse Kit 107.3 robotics development. "
27
- "You specialize in NVIDIA robotics development, computer vision, sensor integration, and simulation workflows. "
28
- "Provide practical, code-focused guidance with complete examples and best practices."
29
- )
30
-
31
- DEFAULT_MAX_NEW_TOKENS = 1024
32
- DEFAULT_MAX_INPUT_TOKENS = 2048
33
-
34
- def to_single_turn(messages):
35
- lines = []
36
- for m in messages:
37
- role = m.get("role", "user").capitalize()
38
- lines.append(f"{role}: {m.get('content','')}")
39
- lines.append("Assistant:")
40
- return "\n".join(lines)
41
-
42
- def call_endpoint(messages, parameters):
43
- start = time.time()
44
-
45
- # Debug logging
46
- print(f"🔍 DEBUG: Calling endpoint: {ENDPOINT_URL}")
47
- print(f"🔍 DEBUG: Headers: {HEADERS}")
48
-
49
- # Prefer single-turn first (matches your handler expectations)
50
- payload_inputs = {"inputs": to_single_turn(messages), "parameters": parameters}
51
- print(f"🔍 DEBUG: Payload: {payload_inputs}")
52
-
53
- resp = requests.post(ENDPOINT_URL, headers=HEADERS, json=payload_inputs, timeout=120)
54
- latency = time.time() - start
55
-
56
- print(f"🔍 DEBUG: Response status: {resp.status_code}")
57
- print(f"🔍 DEBUG: Response body: {resp.text}")
58
-
59
- if resp.status_code == 200:
60
- data = resp.json()
61
- text = data.get("generated_text") if isinstance(data, dict) else str(data)
62
- return text or "", latency
63
-
64
- # Fallback to messages for servers that support chat
65
- print(f"🔍 DEBUG: First attempt failed, trying messages format...")
66
- resp2 = requests.post(
67
- ENDPOINT_URL,
68
- headers=HEADERS,
69
- json={"messages": messages, "parameters": parameters},
70
- timeout=120,
71
- )
72
- latency = time.time() - start
73
- print(f"🔍 DEBUG: Fallback response status: {resp2.status_code}")
74
- print(f"🔍 DEBUG: Fallback response body: {resp2.text}")
75
-
76
- if resp2.status_code == 200:
77
- data = resp2.json()
78
- text = data.get("generated_text") if isinstance(data, dict) else str(data)
79
- return text or "", latency
80
-
81
- return f"HTTP {resp.status_code}/{resp2.status_code}: {resp.text or resp2.text}", latency
82
-
83
- def build_messages(chat_history, user_input, system_prompt):
84
- messages = []
85
- if system_prompt and system_prompt.strip():
86
- messages.append({"role": "system", "content": system_prompt.strip()})
87
- else:
88
- messages.append({"role": "system", "content": SYSTEM_PROMPT_DEFAULT})
89
- for u, b in chat_history:
90
- if u:
91
- messages.append({"role": "user", "content": u})
92
- if b:
93
- messages.append({"role": "assistant", "content": b})
94
- if user_input:
95
- messages.append({"role": "user", "content": user_input})
96
- return messages
97
-
98
- def trim_history(chat_history, max_turns=4):
99
- return chat_history[-max_turns:]
100
-
101
- def to_chatbot_messages(chat_history):
102
- msgs = []
103
- for u, a in chat_history:
104
- if u:
105
- msgs.append({"role": "user", "content": u})
106
- if a:
107
- msgs.append({"role": "assistant", "content": a})
108
- return msgs
109
-
110
- def respond(user_input, chat_history, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt):
111
- if not user_input:
112
- return gr.update(value=""), chat_history, to_chatbot_messages(chat_history), gr.update(value="")
113
-
114
- chat_history = trim_history(chat_history, max_turns=4)
115
-
116
- params = {
117
- "max_new_tokens": int(max_new_tokens),
118
- "temperature": float(temperature),
119
- "top_p": float(top_p),
120
- "max_input_tokens": int(max_input_tokens),
121
- }
122
- messages = build_messages(chat_history, user_input, system_prompt)
123
-
124
- # Show the user message immediately
125
- chat_history = chat_history + [(user_input, None)]
126
- reply, latency = call_endpoint(messages, params)
127
- chat_history[-1] = (user_input, reply)
128
-
129
- # Clear input, update state, update chatbot (messages format), update latency
130
- return "", chat_history, to_chatbot_messages(chat_history), f"{latency:.2f}s"
131
-
132
- def new_chat():
133
- return [], [], ""
134
-
135
- custom_css = """
136
- #app {max-width: 980px; margin: 0 auto;}
137
- footer {visibility: hidden;}
138
- .gradio-container {font-size: 14px;}
139
- #controls .label-wrap {min-width: 160px;}
140
- """
141
-
142
- with gr.Blocks(title="Qwen2.5‑Coder‑7B‑Instruct‑Omni1.1 (Isaac Sim Robotics Assistant)", css=custom_css) as demo:
143
- gr.Markdown("### Qwen2.5‑Coder‑7B‑Instruct‑Omni1.1\nChat with your Isaac Sim 5.0 robotics development assistant. This Space calls the TomBombadyl/Qwen2.5-Coder-7B-Instruct-Omni1.1 Inference Endpoint powered by NVIDIA L4 GPU.")
144
-
145
- # Chat at the top (messages format to avoid deprecation)
146
- chatbot = gr.Chatbot(height=520, show_copy_button=True, type="messages")
147
-
148
- # Input row
149
- with gr.Row():
150
- user_input = gr.Textbox(placeholder="Ask about Isaac Sim robotics, computer vision, sensors, simulation...", lines=2, scale=8)
151
- send_btn = gr.Button("Send", variant="primary", scale=1)
152
- new_btn = gr.Button("New chat", scale=1)
153
-
154
- # Right-aligned utility row
155
- with gr.Row():
156
- latency_lbl = gr.Label(value="", label="Latency")
157
-
158
- # Advanced settings (collapsed)
159
- with gr.Accordion("Advanced settings", open=False):
160
- with gr.Row(elem_id="controls"):
161
- temperature = gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="temperature")
162
- top_p = gr.Slider(0.1, 1.0, value=0.7, step=0.01, label="top_p")
163
- max_new_tokens = gr.Slider(16, 1024, value=DEFAULT_MAX_NEW_TOKENS, step=128, label="max_new_tokens")
164
- max_input_tokens = gr.Slider(256, 8192, value=DEFAULT_MAX_INPUT_TOKENS, step=256, label="max_input_tokens")
165
- system_prompt = gr.Textbox(
166
- value=SYSTEM_PROMPT_DEFAULT,
167
- label="System prompt",
168
- lines=3,
169
- placeholder="Optional system instruction for the assistant",
170
- )
171
-
172
- chat_state = gr.State([]) # still store as list of (user, assistant) tuples
173
-
174
- # Return chatbot directly so responses render immediately
175
- send_btn.click(
176
- fn=respond,
177
- inputs=[user_input, chat_state, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt],
178
- outputs=[user_input, chat_state, chatbot, latency_lbl],
179
- )
180
- user_input.submit(
181
- fn=respond,
182
- inputs=[user_input, chat_state, temperature, top_p, max_new_tokens, max_input_tokens, system_prompt],
183
- outputs=[user_input, chat_state, chatbot, latency_lbl],
184
- )
185
-
186
- # New chat resets state and chatbot
187
- new_btn.click(fn=new_chat, outputs=[chat_state, chatbot, latency_lbl])
188
-
189
- # Enable queuing with defaults (avoid unsupported keyword args on older Gradio)
190
- demo.queue()
191
-
192
- if __name__ == "__main__":
193
- demo.launch()