ZENLLC commited on
Commit
4468e2d
·
verified ·
1 Parent(s): 367d17c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai, gradio as gr, json, plotly.graph_objects as go
2
+ from pathlib import Path
3
+
4
+ SYSTEM_PROMPT = """You are a multimodal assistant. Return one of the following response types:
5
+ - Plain text (just a natural reply)
6
+ - JSON object: {"type":"image","prompt":"<dalle-3 prompt>"}
7
+ - JSON object: {"type":"chart","title":"<title>","data":[{"x":[...],"y":[...],"label":"..."}]}
8
+ - JSON object: {"type":"table","headers":["A","B"],"rows":[[1,2],[3,4]]}
9
+ - JSON object: {"type":"audio","text":"Text to speak"}
10
+
11
+ Respond in plain text unless image/chart/table/audio is clearly required.
12
+ """
13
+
14
+ def build_messages(history, user_msg):
15
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
16
+ for u, a in history:
17
+ messages.append({"role": "user", "content": u})
18
+ messages.append({"role": "assistant", "content": a})
19
+ messages.append({"role": "user", "content": user_msg})
20
+ return messages
21
+
22
+ def multimodal_chat(api_key, user_msg, history):
23
+ if not api_key:
24
+ raise gr.Error("🔑 Please provide your OpenAI API key.")
25
+ openai.api_key = api_key
26
+ messages = build_messages(history, user_msg)
27
+ response = openai.chat.completions.create(model="gpt-4o", messages=messages)
28
+ content = response.choices[0].message.content.strip()
29
+
30
+ img_url, fig, table_html, audio_url = None, None, None, None
31
+ try:
32
+ parsed = json.loads(content)
33
+ t = parsed.get("type")
34
+ if t == "image":
35
+ img = openai.images.generate(model="dall-e-3", prompt=parsed["prompt"], size="1024x1024", n=1)
36
+ img_url = img.data[0].url
37
+ history.append([user_msg, f"![generated image]({img_url})"])
38
+ elif t == "chart":
39
+ fig = go.Figure()
40
+ for s in parsed["data"]:
41
+ fig.add_trace(go.Scatter(x=s["x"], y=s["y"], mode="lines+markers", name=s.get("label", "")))
42
+ fig.update_layout(title=parsed["title"])
43
+ history.append([user_msg, parsed["title"]])
44
+ elif t == "table":
45
+ headers = parsed["headers"]
46
+ rows = parsed["rows"]
47
+ table_html = f"<table><thead><tr>{''.join(f'<th>{h}</th>' for h in headers)}</tr></thead><tbody>"
48
+ table_html += ''.join(f"<tr>{''.join(f'<td>{c}</td>' for c in row)}</tr>" for row in rows)
49
+ table_html += "</tbody></table>"
50
+ history.append([user_msg, "Table generated below"])
51
+ elif t == "audio":
52
+ audio = openai.audio.speech.create(model="tts-1", voice="alloy", input=parsed["text"])
53
+ path = "/tmp/audio.mp3"
54
+ with open(path, "wb") as f: f.write(audio.read())
55
+ audio_url = path
56
+ history.append([user_msg, parsed["text"]])
57
+ else:
58
+ history.append([user_msg, content])
59
+ except Exception:
60
+ history.append([user_msg, content])
61
+ return history, img_url, fig, table_html, audio_url
62
+
63
+ with gr.Blocks(css="style.css") as demo:
64
+ gr.Markdown("🤖 **Multimodal Assistant** – Text, Images, Charts, Tables, Audio", elem_id="zen-header")
65
+ api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
66
+ chatbot = gr.Chatbot()
67
+ with gr.Row():
68
+ user_msg = gr.Textbox(label="Your message", scale=4)
69
+ send_btn = gr.Button("Send", variant="primary")
70
+ img_out = gr.Image()
71
+ chart_out = gr.Plot()
72
+ table_out = gr.HTML()
73
+ audio_out = gr.Audio(type="filepath")
74
+
75
+ def respond(api_key, user_msg, chat_history):
76
+ chat_history, img_url, fig, table, audio = multimodal_chat(api_key, user_msg, chat_history)
77
+ return chat_history, gr.update(value=img_url), gr.update(value=fig), gr.update(value=table), gr.update(value=audio)
78
+
79
+ send_btn.click(respond, [api_key, user_msg, chatbot], [chatbot, img_out, chart_out, table_out, audio_out])
80
+ user_msg.submit(respond, [api_key, user_msg, chatbot], [chatbot, img_out, chart_out, table_out, audio_out])
81
+
82
+ if __name__ == "__main__":
83
+ demo.queue().launch()