Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,24 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import openai as o
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# 📜 CONFIG
|
7 |
-
UI_TITLE = "✨🧙♂️🔮"
|
8 |
KEY_FILE = "key.txt"
|
|
|
9 |
MODELS = {
|
10 |
"GPT-4o ✨": "gpt-4o",
|
11 |
-
"
|
12 |
-
"
|
13 |
-
"GPT-4.5 🔬": "gpt-4-turbo", # Placeholder, gpt-4.5 is not a public model name
|
14 |
-
"GPT-4.1 💻": "gpt-4-turbo", # Placeholder, gpt-4.1 is not a public model name
|
15 |
-
"GPT-4.1-Mini ⚡": "gpt-4-turbo", # Placeholder, gpt-4.1-mini is not a public model name
|
16 |
}
|
17 |
|
18 |
# 🎨 STYLE
|
@@ -20,133 +26,219 @@ H1 = "# <font size='7'>{0}</font>"
|
|
20 |
H2 = "## <font size='6'>{0}</font>"
|
21 |
BTN_STYLE = "<font size='5'>{0}</font>"
|
22 |
|
23 |
-
# 🪄 HELPERS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def save_key(k: str) -> str:
|
25 |
-
"💾🔑"
|
26 |
if not k or not k.strip(): return "🚫 Empty Key"
|
27 |
with open(KEY_FILE, "w") as f: f.write(k.strip())
|
28 |
-
return "🔑✅"
|
29 |
|
30 |
def get_key(k: str) -> str:
|
31 |
-
"📜🔑"
|
32 |
k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
|
33 |
-
if not k: raise gr.Error("❗🔑
|
34 |
o.api_key = k
|
35 |
return k
|
36 |
|
37 |
-
def
|
|
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
-
A pact with
|
40 |
-
To seek its counsel, one must present a worthy key (scribe_key),
|
41 |
-
a quest (quest), and the Oracle's own name (model_name)
|
42 |
-
upon the ancient scroll (scroll) of dialogue.
|
43 |
"""
|
44 |
-
get_key(scribe_key)
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
{"role": "user", "content": user_words}
|
49 |
-
for user_words, _ in scroll
|
50 |
-
] + [
|
51 |
-
{"role": "assistant", "content": oracle_words}
|
52 |
-
for _, oracle_words in scroll
|
53 |
-
]
|
54 |
-
celestial_scroll.append({"role": "user", "content": quest})
|
55 |
-
|
56 |
-
# The Oracle whispers its response from the aether.
|
57 |
try:
|
58 |
-
prophecy = o.chat.completions.create(model=model_name, messages=
|
|
|
|
|
|
|
59 |
|
60 |
-
# The new wisdom is recorded as it is spoken.
|
61 |
-
scroll.append((quest, ""))
|
62 |
for chunk in prophecy:
|
63 |
if chunk.choices[0].delta.content:
|
64 |
-
|
65 |
-
yield
|
66 |
except Exception as e:
|
67 |
-
|
68 |
-
yield
|
69 |
|
|
|
70 |
|
71 |
-
def
|
72 |
-
""
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# 🔮 UI
|
84 |
with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
|
|
|
|
|
|
|
|
|
85 |
gr.Markdown(H1.format(UI_TITLE))
|
86 |
|
87 |
-
|
88 |
-
with gr.Accordion("🔑 Eldritch Key", open=False):
|
89 |
with gr.Row():
|
90 |
-
api_key_box = gr.Textbox(
|
91 |
-
label="🔑",
|
92 |
-
type="password",
|
93 |
-
placeholder="sk-...",
|
94 |
-
value=get_key("") if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""),
|
95 |
-
scale=3
|
96 |
-
)
|
97 |
save_btn = gr.Button("💾", scale=1)
|
98 |
status_txt = gr.Textbox(interactive=False, scale=1, label="Status")
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
gr.
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
is_visible = display_name in model_selector.value
|
112 |
-
with gr.Blocks(visible=is_visible) as model_block:
|
113 |
-
gr.Markdown(f"### <font size='5'>{display_name}</font>")
|
114 |
-
chatbot = gr.Chatbot(height=350, label=f"Scroll of {display_name}")
|
115 |
-
with gr.Row():
|
116 |
-
run_btn = gr.Button(value=BTN_STYLE.format("▶️ Run"), variant="primary", scale=1)
|
117 |
-
stop_btn = gr.Button(value=BTN_STYLE.format("⏹️ Stop"), variant="stop", scale=1)
|
118 |
-
|
119 |
-
# Each run button triggers its own oracle
|
120 |
-
# The `_js` param is a trick to pass the model's API name to the Python function
|
121 |
-
run_event = run_btn.click(
|
122 |
-
fn=summon_oracle,
|
123 |
-
inputs=[gr.State(api_name), api_key_box, chatbot.i_am_a_dummy_component_for_the_event_to_work, chatbot],
|
124 |
-
outputs=[chatbot]
|
125 |
)
|
126 |
-
stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[run_event])
|
127 |
-
model_blocks.append(model_block)
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
if __name__ == "__main__":
|
150 |
-
|
151 |
-
gr.Chatbot.i_am_a_dummy_component_for_the_event_to_work = gr.Textbox(visible=False)
|
152 |
-
demo.launch(share=True)
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import openai as o
|
4 |
+
import base64
|
5 |
+
import fitz # PyMuPDF
|
6 |
+
import cv2
|
7 |
+
from moviepy.video.io.VideoFileClip import VideoFileClip
|
8 |
+
import json
|
9 |
+
import requests
|
10 |
+
import re
|
11 |
+
from io import BytesIO
|
12 |
+
from PIL import Image
|
13 |
|
14 |
# 📜 CONFIG
|
15 |
+
UI_TITLE = "✨🧙♂️🔮 GPT-4o Omni-Oracle"
|
16 |
KEY_FILE = "key.txt"
|
17 |
+
STATE_FILE = "app_state.json"
|
18 |
MODELS = {
|
19 |
"GPT-4o ✨": "gpt-4o",
|
20 |
+
"GPT-4 Turbo 🚀": "gpt-4-turbo",
|
21 |
+
"GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
|
|
|
|
|
|
|
22 |
}
|
23 |
|
24 |
# 🎨 STYLE
|
|
|
26 |
H2 = "## <font size='6'>{0}</font>"
|
27 |
BTN_STYLE = "<font size='5'>{0}</font>"
|
28 |
|
29 |
+
# 🪄 HELPERS, LORE & AUTOSAVE RITUALS
|
30 |
+
def save_state(data: dict):
|
31 |
+
"""A rune that inscribes the session's memory onto a JSON scroll."""
|
32 |
+
with open(STATE_FILE, 'w') as f:
|
33 |
+
json.dump(data, f, indent=4)
|
34 |
+
|
35 |
+
def load_state() -> dict:
|
36 |
+
"""A ritual to recall the session's memory from the JSON scroll."""
|
37 |
+
if os.path.exists(STATE_FILE):
|
38 |
+
with open(STATE_FILE, 'r') as f:
|
39 |
+
try:
|
40 |
+
return json.load(f)
|
41 |
+
except json.JSONDecodeError:
|
42 |
+
return {} # Return empty if scroll is corrupted
|
43 |
+
return {}
|
44 |
+
|
45 |
+
def update_and_save(key: str, value, state: dict):
|
46 |
+
"""A binding spell that updates a memory and immediately inscribes it."""
|
47 |
+
state[key] = value
|
48 |
+
save_state(state)
|
49 |
+
return state
|
50 |
+
|
51 |
def save_key(k: str) -> str:
|
52 |
+
"💾🔑 A rune to bind the Eldritch Key to the physical realm (disk)."
|
53 |
if not k or not k.strip(): return "🚫 Empty Key"
|
54 |
with open(KEY_FILE, "w") as f: f.write(k.strip())
|
55 |
+
return "🔑✅ Key Saved!"
|
56 |
|
57 |
def get_key(k: str) -> str:
|
58 |
+
"📜🔑 A ritual to summon the Eldritch Key, prioritizing the user's offering, then the bound key, then one from the environment."
|
59 |
k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
|
60 |
+
if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required to commune with the Oracles.")
|
61 |
o.api_key = k
|
62 |
return k
|
63 |
|
64 |
+
def file_to_base64(file_path):
|
65 |
+
"""Encodes a file into a base64 string for embedding in API calls."""
|
66 |
+
with open(file_path, "rb") as f:
|
67 |
+
return base64.b64encode(f.read()).decode('utf-8')
|
68 |
+
|
69 |
+
def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
|
70 |
"""
|
71 |
+
A universal pact with any Oracle. It can perceive text, images, and the echoes of past conversations.
|
|
|
|
|
|
|
72 |
"""
|
73 |
+
get_key(scribe_key)
|
74 |
+
|
75 |
+
messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
|
76 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
try:
|
78 |
+
prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
|
79 |
+
|
80 |
+
history.append({"role": "user", "content": "..."})
|
81 |
+
history.append({"role": "assistant", "content": ""})
|
82 |
|
|
|
|
|
83 |
for chunk in prophecy:
|
84 |
if chunk.choices[0].delta.content:
|
85 |
+
history[-1]['content'] += chunk.choices[0].delta.content
|
86 |
+
yield history
|
87 |
except Exception as e:
|
88 |
+
error_message = f"🧙♂️🔮 A magical disturbance occurred: {str(e)}"
|
89 |
+
yield history + [{"role": "assistant", "content": error_message}]
|
90 |
|
91 |
+
# --- Modality-Specific Summoning Rituals ---
|
92 |
|
93 |
+
def summon_vision_from_image(api_key, model, prompt, image_path, history):
|
94 |
+
"A ritual to grant sight to the Oracle, allowing it to perceive an image."
|
95 |
+
if image_path is None:
|
96 |
+
raise gr.Error("An image must be provided to summon vision.")
|
97 |
+
|
98 |
+
b64_image = file_to_base64(image_path.name)
|
99 |
+
user_content = [
|
100 |
+
{"type": "text", "text": prompt},
|
101 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
|
102 |
+
]
|
103 |
+
system_prompt = "You are a helpful assistant that analyzes images. Respond in Markdown."
|
104 |
+
yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
|
105 |
+
|
106 |
+
def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
|
107 |
+
"A rite to translate spoken words from an audio file into text, then seek the Oracle's wisdom upon it."
|
108 |
+
if audio_path is None:
|
109 |
+
raise gr.Error("An audio file must be provided to summon its echo.")
|
110 |
+
get_key(api_key)
|
111 |
+
with open(audio_path.name, "rb") as audio_file:
|
112 |
+
transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
113 |
+
|
114 |
+
full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
|
115 |
+
user_content = [{"type": "text", "text": full_prompt}]
|
116 |
+
system_prompt = "You are a helpful assistant analyzing an audio transcript. Summarize it and answer questions. Respond in Markdown."
|
117 |
+
yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
|
118 |
+
|
119 |
+
def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
|
120 |
+
"Extracts the written word from PDF or text files to present to the Oracle."
|
121 |
+
if file_path is None: raise gr.Error("A file must be provided.")
|
122 |
+
text_content = ""
|
123 |
+
if file_path.name.lower().endswith('.pdf'):
|
124 |
+
with fitz.open(file_path.name) as doc:
|
125 |
+
text_content = "".join(page.get_text() for page in doc)
|
126 |
+
else:
|
127 |
+
with open(file_path.name, 'r', encoding='utf-8') as f:
|
128 |
+
text_content = f.read()
|
129 |
+
|
130 |
+
full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
|
131 |
+
user_content = [{"type": "text", "text": full_prompt}]
|
132 |
+
system_prompt = "You are a helpful assistant analyzing a document. Summarize it and answer questions. Respond in Markdown."
|
133 |
+
yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
|
134 |
+
|
135 |
+
def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
|
136 |
+
"A grand ritual to divine meaning from a video's moving pictures and spoken words."
|
137 |
+
if video_path is None: raise gr.Error("A video must be provided.")
|
138 |
+
get_key(api_key)
|
139 |
+
base_video_path, _ = os.path.splitext(video_path.name)
|
140 |
+
|
141 |
+
progress(0.1, desc="🔮 Extracting Audio...")
|
142 |
+
audio_path = f"{base_video_path}.mp3"
|
143 |
+
transcript_text = "No audio found."
|
144 |
+
try:
|
145 |
+
with VideoFileClip(video_path.name) as clip:
|
146 |
+
clip.audio.write_audiofile(audio_path, bitrate="32k", logger=None)
|
147 |
+
progress(0.3, desc="🎤 Transcribing Audio...")
|
148 |
+
with open(audio_path, "rb") as audio_file:
|
149 |
+
transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
|
150 |
+
except Exception as e:
|
151 |
+
print(f"Audio failed: {e}")
|
152 |
+
|
153 |
+
progress(0.6, desc="🖼️ Sampling Frames...")
|
154 |
+
base64Frames = []
|
155 |
+
video = cv2.VideoCapture(video_path.name)
|
156 |
+
total_frames, fps = int(video.get(cv2.CAP_PROP_FRAME_COUNT)), video.get(cv2.CAP_PROP_FPS)
|
157 |
+
frames_to_skip = int(fps * 2)
|
158 |
+
for curr_frame in range(0, total_frames - 1, frames_to_skip):
|
159 |
+
if len(base64Frames) >= 10: break
|
160 |
+
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
|
161 |
+
success, frame = video.read()
|
162 |
+
if not success: break
|
163 |
+
_, buffer = cv2.imencode(".jpg", frame)
|
164 |
+
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
|
165 |
+
video.release()
|
166 |
+
|
167 |
+
progress(0.8, desc="🌀 Consulting Oracle...")
|
168 |
+
user_content = [
|
169 |
+
{"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"},
|
170 |
+
*map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
|
171 |
+
]
|
172 |
+
system_prompt = "You are a helpful video analyst. Use the frames and transcript to summarize and answer questions. Respond in Markdown."
|
173 |
+
yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
|
174 |
|
175 |
# 🔮 UI
|
176 |
with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
|
177 |
+
# --- Load State & Create State Holder ---
|
178 |
+
initial_state = load_state()
|
179 |
+
app_state = gr.State(initial_state)
|
180 |
+
|
181 |
gr.Markdown(H1.format(UI_TITLE))
|
182 |
|
183 |
+
with gr.Accordion("🔑 Eldritch Key & Oracle Selection", open=True):
|
|
|
184 |
with gr.Row():
|
185 |
+
api_key_box = gr.Textbox(label="🔑 Key", type="password", placeholder="sk-...", scale=3, value=initial_state.get('api_key', ''))
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
save_btn = gr.Button("💾", scale=1)
|
187 |
status_txt = gr.Textbox(interactive=False, scale=1, label="Status")
|
188 |
+
model_selector = gr.Dropdown(choices=list(MODELS.keys()), label="🔮 Oracle", value=initial_state.get('model', "GPT-4o ✨"))
|
189 |
+
save_btn.click(save_key, inputs=api_key_box, outputs=status_txt)
|
190 |
+
|
191 |
+
chatbot = gr.Chatbot(height=500, label="📜 Scroll of Conversation", type='messages', value=initial_state.get('chatbot', []))
|
192 |
+
|
193 |
+
with gr.Tabs():
|
194 |
+
with gr.TabItem("💬 Chat"):
|
195 |
+
text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
|
196 |
+
text_event = text_prompt.submit(
|
197 |
+
fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist),
|
198 |
+
inputs=[api_key_box, model_selector, text_prompt, chatbot],
|
199 |
+
outputs=chatbot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
)
|
|
|
|
|
201 |
|
202 |
+
with gr.TabItem("🖼️ Image"):
|
203 |
+
with gr.Row():
|
204 |
+
image_input = gr.File(label="Upload Image", type="file")
|
205 |
+
image_output = gr.Image(label="Your Image", type="filepath", interactive=False)
|
206 |
+
image_prompt = gr.Textbox(label="Image Prompt:", value=initial_state.get('image_prompt', "What is in this image?"))
|
207 |
+
image_btn = gr.Button("👁️ Summon Vision")
|
208 |
+
image_input.change(lambda x: x, inputs=image_input, outputs=image_output)
|
209 |
+
image_event = image_btn.click(summon_vision_from_image, [api_key_box, model_selector, image_prompt, image_input, chatbot], chatbot)
|
210 |
+
|
211 |
+
with gr.TabItem("🎤 Audio"):
|
212 |
+
audio_input = gr.File(label="Upload Audio", type="file")
|
213 |
+
audio_prompt = gr.Textbox(label="Audio Prompt:", value=initial_state.get('audio_prompt', "Summarize this audio."))
|
214 |
+
audio_btn = gr.Button("🗣️ Summon Echo")
|
215 |
+
audio_event = audio_btn.click(summon_echo_from_audio, [api_key_box, model_selector, audio_prompt, audio_input, chatbot], chatbot)
|
216 |
+
|
217 |
+
with gr.TabItem("🎥 Video"):
|
218 |
+
video_input = gr.File(label="Upload Video", type="file")
|
219 |
+
video_prompt = gr.Textbox(label="Video Prompt:", value=initial_state.get('video_prompt', "Summarize this video."))
|
220 |
+
video_btn = gr.Button("🎬 Summon Chronicle")
|
221 |
+
video_event = video_btn.click(summon_chronicle_from_video, [api_key_box, model_selector, video_prompt, video_input, chatbot], chatbot)
|
222 |
+
|
223 |
+
with gr.TabItem("📄 Document"):
|
224 |
+
doc_input = gr.File(label="Upload PDF or TXT", type="file")
|
225 |
+
doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
|
226 |
+
doc_btn = gr.Button("📖 Summon Wisdom")
|
227 |
+
doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
|
228 |
+
|
229 |
+
# --- Autosave Event Listeners ---
|
230 |
+
# Simple text/dropdown inputs
|
231 |
+
api_key_box.change(update_and_save, [gr.State('api_key'), api_key_box, app_state], app_state)
|
232 |
+
model_selector.change(update_and_save, [gr.State('model'), model_selector, app_state], app_state)
|
233 |
+
text_prompt.change(update_and_save, [gr.State('text_prompt'), text_prompt, app_state], app_state)
|
234 |
+
image_prompt.change(update_and_save, [gr.State('image_prompt'), image_prompt, app_state], app_state)
|
235 |
+
audio_prompt.change(update_and_save, [gr.State('audio_prompt'), audio_prompt, app_state], app_state)
|
236 |
+
video_prompt.change(update_and_save, [gr.State('video_prompt'), video_prompt, app_state], app_state)
|
237 |
+
doc_prompt.change(update_and_save, [gr.State('doc_prompt'), doc_prompt, app_state], app_state)
|
238 |
+
|
239 |
+
# Chatbot history, saved after each interaction
|
240 |
+
for event in [text_event, image_event, audio_event, video_event, doc_event]:
|
241 |
+
event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
|
242 |
|
243 |
if __name__ == "__main__":
|
244 |
+
demo.launch(share=True, debug=True)
|
|
|
|