Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai, gradio as gr, json, plotly.graph_objects as go
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
SYSTEM_PROMPT = """You are a multimodal assistant. Return one of the following response types:
|
5 |
+
- Plain text (just a natural reply)
|
6 |
+
- JSON object: {"type":"image","prompt":"<dalle-3 prompt>"}
|
7 |
+
- JSON object: {"type":"chart","title":"<title>","data":[{"x":[...],"y":[...],"label":"..."}]}
|
8 |
+
- JSON object: {"type":"table","headers":["A","B"],"rows":[[1,2],[3,4]]}
|
9 |
+
- JSON object: {"type":"audio","text":"Text to speak"}
|
10 |
+
|
11 |
+
Respond in plain text unless image/chart/table/audio is clearly required.
|
12 |
+
"""
|
13 |
+
|
14 |
+
def build_messages(history, user_msg):
|
15 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
16 |
+
for u, a in history:
|
17 |
+
messages.append({"role": "user", "content": u})
|
18 |
+
messages.append({"role": "assistant", "content": a})
|
19 |
+
messages.append({"role": "user", "content": user_msg})
|
20 |
+
return messages
|
21 |
+
|
22 |
+
def multimodal_chat(api_key, user_msg, history):
|
23 |
+
if not api_key:
|
24 |
+
raise gr.Error("🔑 Please provide your OpenAI API key.")
|
25 |
+
openai.api_key = api_key
|
26 |
+
messages = build_messages(history, user_msg)
|
27 |
+
response = openai.chat.completions.create(model="gpt-4o", messages=messages)
|
28 |
+
content = response.choices[0].message.content.strip()
|
29 |
+
|
30 |
+
img_url, fig, table_html, audio_url = None, None, None, None
|
31 |
+
try:
|
32 |
+
parsed = json.loads(content)
|
33 |
+
t = parsed.get("type")
|
34 |
+
if t == "image":
|
35 |
+
img = openai.images.generate(model="dall-e-3", prompt=parsed["prompt"], size="1024x1024", n=1)
|
36 |
+
img_url = img.data[0].url
|
37 |
+
history.append([user_msg, f""])
|
38 |
+
elif t == "chart":
|
39 |
+
fig = go.Figure()
|
40 |
+
for s in parsed["data"]:
|
41 |
+
fig.add_trace(go.Scatter(x=s["x"], y=s["y"], mode="lines+markers", name=s.get("label", "")))
|
42 |
+
fig.update_layout(title=parsed["title"])
|
43 |
+
history.append([user_msg, parsed["title"]])
|
44 |
+
elif t == "table":
|
45 |
+
headers = parsed["headers"]
|
46 |
+
rows = parsed["rows"]
|
47 |
+
table_html = f"<table><thead><tr>{''.join(f'<th>{h}</th>' for h in headers)}</tr></thead><tbody>"
|
48 |
+
table_html += ''.join(f"<tr>{''.join(f'<td>{c}</td>' for c in row)}</tr>" for row in rows)
|
49 |
+
table_html += "</tbody></table>"
|
50 |
+
history.append([user_msg, "Table generated below"])
|
51 |
+
elif t == "audio":
|
52 |
+
audio = openai.audio.speech.create(model="tts-1", voice="alloy", input=parsed["text"])
|
53 |
+
path = "/tmp/audio.mp3"
|
54 |
+
with open(path, "wb") as f: f.write(audio.read())
|
55 |
+
audio_url = path
|
56 |
+
history.append([user_msg, parsed["text"]])
|
57 |
+
else:
|
58 |
+
history.append([user_msg, content])
|
59 |
+
except Exception:
|
60 |
+
history.append([user_msg, content])
|
61 |
+
return history, img_url, fig, table_html, audio_url
|
62 |
+
|
63 |
+
with gr.Blocks(css="style.css") as demo:
|
64 |
+
gr.Markdown("🤖 **Multimodal Assistant** – Text, Images, Charts, Tables, Audio", elem_id="zen-header")
|
65 |
+
api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
|
66 |
+
chatbot = gr.Chatbot()
|
67 |
+
with gr.Row():
|
68 |
+
user_msg = gr.Textbox(label="Your message", scale=4)
|
69 |
+
send_btn = gr.Button("Send", variant="primary")
|
70 |
+
img_out = gr.Image()
|
71 |
+
chart_out = gr.Plot()
|
72 |
+
table_out = gr.HTML()
|
73 |
+
audio_out = gr.Audio(type="filepath")
|
74 |
+
|
75 |
+
def respond(api_key, user_msg, chat_history):
|
76 |
+
chat_history, img_url, fig, table, audio = multimodal_chat(api_key, user_msg, chat_history)
|
77 |
+
return chat_history, gr.update(value=img_url), gr.update(value=fig), gr.update(value=table), gr.update(value=audio)
|
78 |
+
|
79 |
+
send_btn.click(respond, [api_key, user_msg, chatbot], [chatbot, img_out, chart_out, table_out, audio_out])
|
80 |
+
user_msg.submit(respond, [api_key, user_msg, chatbot], [chatbot, img_out, chart_out, table_out, audio_out])
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
demo.queue().launch()
|