Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,8 @@ from io import BytesIO
|
|
12 |
from PIL import Image
|
13 |
from pathlib import Path
|
14 |
import numpy as np
|
15 |
-
from
|
|
|
16 |
|
17 |
# 📜 CONFIG
|
18 |
UI_TITLE = "✨🧙♂️🔮 GPT-4o Omni-Oracle"
|
@@ -26,8 +27,6 @@ MODELS = {
|
|
26 |
"GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
|
27 |
"GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
|
28 |
"GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
|
29 |
-
"GPT-4 Turbo 🚀": "gpt-4-turbo",
|
30 |
-
"GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
|
31 |
}
|
32 |
VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
|
33 |
TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
|
@@ -37,7 +36,7 @@ LANGUAGES = {
|
|
37 |
"🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
|
38 |
"🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
|
39 |
}
|
40 |
-
# For WebRTC - Replace with your own if deploying
|
41 |
RTC_CONFIGURATION = {
|
42 |
"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
|
43 |
}
|
@@ -52,12 +51,10 @@ CSS = """
|
|
52 |
|
53 |
# 🪄 HELPERS, LORE & AUTOSAVE RITUALS
|
54 |
def save_state(data: dict):
|
55 |
-
"""A rune that inscribes the session's memory onto a JSON scroll."""
|
56 |
with open(STATE_FILE, 'w') as f:
|
57 |
json.dump(data, f, indent=4)
|
58 |
|
59 |
def load_state() -> dict:
|
60 |
-
"""A ritual to recall the session's memory from the JSON scroll."""
|
61 |
if os.path.exists(STATE_FILE):
|
62 |
with open(STATE_FILE, 'r') as f:
|
63 |
try:
|
@@ -67,37 +64,52 @@ def load_state() -> dict:
|
|
67 |
return {}
|
68 |
|
69 |
def update_and_save(key: str, value, state: dict):
|
70 |
-
"""A binding spell that updates a memory and immediately inscribes it."""
|
71 |
state[key] = value
|
72 |
save_state(state)
|
73 |
return state
|
74 |
|
75 |
def save_key(k: str) -> str:
|
76 |
-
"💾🔑 A rune to bind the Eldritch Key."
|
77 |
if not k or not k.strip(): return "🚫 Empty Key"
|
78 |
with open(KEY_FILE, "w") as f: f.write(k.strip())
|
79 |
return "🔑✅ Key Saved!"
|
80 |
|
81 |
def get_key(k: str) -> str:
|
82 |
-
"📜🔑 A ritual to summon the Eldritch Key."
|
83 |
k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
|
84 |
if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
|
85 |
o.api_key = k
|
86 |
return k
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
# --- Image & Audio Streaming Functions ---
|
89 |
|
90 |
def transform_cv2(frame: np.ndarray, transform: str):
|
91 |
"""Applies a magical filter to a single frame from a webcam stream."""
|
|
|
92 |
if transform == "cartoon":
|
93 |
img_color = cv2.pyrDown(cv2.pyrDown(frame))
|
94 |
for _ in range(6):
|
95 |
img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
|
96 |
img_color = cv2.pyrUp(cv2.pyrUp(img_color))
|
97 |
img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
|
98 |
-
img_edges = cv2.adaptiveThreshold(
|
99 |
-
cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C,
|
100 |
-
cv2.THRESH_BINARY, 9, 2)
|
101 |
img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
|
102 |
return cv2.bitwise_and(img_color, img_edges)
|
103 |
elif transform == "edges":
|
@@ -106,34 +118,27 @@ def transform_cv2(frame: np.ndarray, transform: str):
|
|
106 |
return np.flipud(frame)
|
107 |
return frame
|
108 |
|
109 |
-
def transcribe_streaming(audio_chunk, history_state):
|
110 |
"""Transcribes a chunk of audio, keeping context from previous chunks."""
|
111 |
if audio_chunk is None:
|
112 |
-
return history_state,
|
113 |
-
|
114 |
-
# In a real scenario, you would use a streaming-capable ASR model.
|
115 |
-
# Here, we simulate it by transcribing each chunk individually.
|
116 |
-
# This is a placeholder for a more complex implementation.
|
117 |
-
get_key(os.getenv("OPENAI_KEY", "")) # Ensure API key is set
|
118 |
-
|
119 |
-
# Save chunk to a temporary file to use with OpenAI API
|
120 |
-
temp_wav_path = "temp_chunk.wav"
|
121 |
sample_rate, data = audio_chunk
|
122 |
-
|
123 |
sf.write(temp_wav_path, data, sample_rate)
|
124 |
-
|
125 |
try:
|
126 |
with open(temp_wav_path, "rb") as audio_file:
|
127 |
transcript = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
128 |
new_text = transcript.text
|
129 |
except Exception as e:
|
130 |
print(f"Transcription error: {e}")
|
131 |
-
new_text = "
|
132 |
-
|
|
|
|
|
133 |
history_state += new_text + " "
|
134 |
return history_state, history_state
|
135 |
|
136 |
-
# --- Other Functions (TTS, etc.) ---
|
137 |
def generate_speech(api_key, tts_model, voice, text, language_key, format, progress=gr.Progress()):
|
138 |
get_key(api_key)
|
139 |
language = LANGUAGES.get(language_key, "English")
|
@@ -174,8 +179,7 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
|
|
174 |
with gr.Tabs():
|
175 |
with gr.TabItem("💬 Chat"):
|
176 |
text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
|
177 |
-
|
178 |
-
text_event = text_prompt.submit(fn=lambda k, m, p, h: invoke_oracle(k, m, "You are a helpful AI.", [{"type": "text", "text": p}], h), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
|
179 |
|
180 |
with gr.TabItem("🖼️ Streaming Image"):
|
181 |
gr.Markdown(H2.format("Live Image Enchantments"))
|
@@ -191,16 +195,17 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
|
|
191 |
mic_input = gr.Audio(sources="microphone", streaming=True)
|
192 |
transcript_output = gr.Textbox(label="Transcript", interactive=False)
|
193 |
transcript_state = gr.State(value="")
|
194 |
-
mic_input.stream(transcribe_streaming, [mic_input, transcript_state], [transcript_state, transcript_output], time_limit=
|
195 |
|
196 |
-
with gr.TabItem("👁️ Object Detection"):
|
197 |
-
gr.Markdown(H2.format("Live Scrying
|
198 |
-
gr.HTML("<h3 style='text-align: center'>
|
199 |
with gr.Column(elem_classes=["my-column"]):
|
200 |
with gr.Group(elem_classes=["my-group"]):
|
201 |
webrtc_stream = WebRTC(label="Stream", rtc_configuration=RTC_CONFIGURATION)
|
202 |
conf_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
|
203 |
-
# Placeholder for the actual stream event handler which would call
|
|
|
204 |
# webrtc_stream.stream(fn=detection_placeholder, inputs=[webrtc_stream, conf_threshold], outputs=[webrtc_stream], time_limit=10)
|
205 |
|
206 |
with gr.TabItem("🔊 Speech Synthesis"):
|
@@ -227,7 +232,4 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
|
|
227 |
text_event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
|
228 |
|
229 |
if __name__ == "__main__":
|
230 |
-
# A placeholder function for the YOLOv10 detection since we don't have the model loaded here.
|
231 |
-
def detection_placeholder(image, conf):
|
232 |
-
return image # Just return the image as is.
|
233 |
demo.launch(share=True, debug=True)
|
|
|
12 |
from PIL import Image
|
13 |
from pathlib import Path
|
14 |
import numpy as np
|
15 |
+
from fastrtc.gradio import WebRTC
|
16 |
+
import soundfile as sf
|
17 |
|
18 |
# 📜 CONFIG
|
19 |
UI_TITLE = "✨🧙♂️🔮 GPT-4o Omni-Oracle"
|
|
|
27 |
"GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
|
28 |
"GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
|
29 |
"GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
|
|
|
|
|
30 |
}
|
31 |
VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
|
32 |
TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
|
|
|
36 |
"🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
|
37 |
"🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
|
38 |
}
|
39 |
+
# For WebRTC - Replace with your own if deploying on a cloud provider
|
40 |
RTC_CONFIGURATION = {
|
41 |
"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
|
42 |
}
|
|
|
51 |
|
52 |
# 🪄 HELPERS, LORE & AUTOSAVE RITUALS
|
53 |
def save_state(data: dict):
|
|
|
54 |
with open(STATE_FILE, 'w') as f:
|
55 |
json.dump(data, f, indent=4)
|
56 |
|
57 |
def load_state() -> dict:
|
|
|
58 |
if os.path.exists(STATE_FILE):
|
59 |
with open(STATE_FILE, 'r') as f:
|
60 |
try:
|
|
|
64 |
return {}
|
65 |
|
66 |
def update_and_save(key: str, value, state: dict):
|
|
|
67 |
state[key] = value
|
68 |
save_state(state)
|
69 |
return state
|
70 |
|
71 |
def save_key(k: str) -> str:
|
|
|
72 |
if not k or not k.strip(): return "🚫 Empty Key"
|
73 |
with open(KEY_FILE, "w") as f: f.write(k.strip())
|
74 |
return "🔑✅ Key Saved!"
|
75 |
|
76 |
def get_key(k: str) -> str:
|
|
|
77 |
k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
|
78 |
if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
|
79 |
o.api_key = k
|
80 |
return k
|
81 |
|
82 |
+
def invoke_oracle(scribe_key: str, model_key: str, system_prompt: str, user_content: list, history: list):
|
83 |
+
get_key(scribe_key)
|
84 |
+
model_name = MODELS.get(model_key, "gpt-4o")
|
85 |
+
messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
|
86 |
+
try:
|
87 |
+
prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
|
88 |
+
history.append({"role": "user", "content": "..."})
|
89 |
+
history.append({"role": "assistant", "content": ""})
|
90 |
+
for chunk in prophecy:
|
91 |
+
if chunk.choices[0].delta.content:
|
92 |
+
history[-1]['content'] += chunk.choices[0].delta.content
|
93 |
+
yield history
|
94 |
+
except Exception as e:
|
95 |
+
yield history + [{"role": "assistant", "content": f"🧙♂️🔮 A magical disturbance occurred: {str(e)}"}]
|
96 |
+
|
97 |
+
def handle_text_submission(api_key, model, prompt, history):
|
98 |
+
"""A clear path for text quests to the Oracle."""
|
99 |
+
yield from invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], history)
|
100 |
+
|
101 |
# --- Image & Audio Streaming Functions ---
|
102 |
|
103 |
def transform_cv2(frame: np.ndarray, transform: str):
|
104 |
"""Applies a magical filter to a single frame from a webcam stream."""
|
105 |
+
if frame is None: return None
|
106 |
if transform == "cartoon":
|
107 |
img_color = cv2.pyrDown(cv2.pyrDown(frame))
|
108 |
for _ in range(6):
|
109 |
img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
|
110 |
img_color = cv2.pyrUp(cv2.pyrUp(img_color))
|
111 |
img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
|
112 |
+
img_edges = cv2.adaptiveThreshold(cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 2)
|
|
|
|
|
113 |
img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
|
114 |
return cv2.bitwise_and(img_color, img_edges)
|
115 |
elif transform == "edges":
|
|
|
118 |
return np.flipud(frame)
|
119 |
return frame
|
120 |
|
121 |
+
def transcribe_streaming(api_key, audio_chunk, history_state):
|
122 |
"""Transcribes a chunk of audio, keeping context from previous chunks."""
|
123 |
if audio_chunk is None:
|
124 |
+
return history_state, history_state
|
125 |
+
get_key(api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
sample_rate, data = audio_chunk
|
127 |
+
temp_wav_path = f"temp_chunk_{hash(data.tobytes())}.wav"
|
128 |
sf.write(temp_wav_path, data, sample_rate)
|
|
|
129 |
try:
|
130 |
with open(temp_wav_path, "rb") as audio_file:
|
131 |
transcript = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
132 |
new_text = transcript.text
|
133 |
except Exception as e:
|
134 |
print(f"Transcription error: {e}")
|
135 |
+
new_text = ""
|
136 |
+
finally:
|
137 |
+
if os.path.exists(temp_wav_path):
|
138 |
+
os.remove(temp_wav_path)
|
139 |
history_state += new_text + " "
|
140 |
return history_state, history_state
|
141 |
|
|
|
142 |
def generate_speech(api_key, tts_model, voice, text, language_key, format, progress=gr.Progress()):
|
143 |
get_key(api_key)
|
144 |
language = LANGUAGES.get(language_key, "English")
|
|
|
179 |
with gr.Tabs():
|
180 |
with gr.TabItem("💬 Chat"):
|
181 |
text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
|
182 |
+
text_event = text_prompt.submit(fn=handle_text_submission, inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
|
|
|
183 |
|
184 |
with gr.TabItem("🖼️ Streaming Image"):
|
185 |
gr.Markdown(H2.format("Live Image Enchantments"))
|
|
|
195 |
mic_input = gr.Audio(sources="microphone", streaming=True)
|
196 |
transcript_output = gr.Textbox(label="Transcript", interactive=False)
|
197 |
transcript_state = gr.State(value="")
|
198 |
+
mic_input.stream(transcribe_streaming, [api_key_box, mic_input, transcript_state], [transcript_state, transcript_output], time_limit=30, stream_every=2)
|
199 |
|
200 |
+
with gr.TabItem("👁️ Object Detection (WebRTC)"):
|
201 |
+
gr.Markdown(H2.format("Live Scrying Spell"))
|
202 |
+
gr.HTML("<h3 style='text-align: center'>NOTE: This is a UI placeholder. A separate inference server for the YOLO model is required for this to function.</h3>")
|
203 |
with gr.Column(elem_classes=["my-column"]):
|
204 |
with gr.Group(elem_classes=["my-group"]):
|
205 |
webrtc_stream = WebRTC(label="Stream", rtc_configuration=RTC_CONFIGURATION)
|
206 |
conf_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
|
207 |
+
# Placeholder for the actual stream event handler which would call a loaded YOLOv10 model
|
208 |
+
# def detection_placeholder(image, conf): return image
|
209 |
# webrtc_stream.stream(fn=detection_placeholder, inputs=[webrtc_stream, conf_threshold], outputs=[webrtc_stream], time_limit=10)
|
210 |
|
211 |
with gr.TabItem("🔊 Speech Synthesis"):
|
|
|
232 |
text_event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
|
233 |
|
234 |
if __name__ == "__main__":
|
|
|
|
|
|
|
235 |
demo.launch(share=True, debug=True)
|