Spaces:

awacke1
/

GradioGPTGameGenerator

Runtime error

App Files Files Community

awacke1 commited on 3 days ago

Commit

621f3cd

verified ·

1 Parent(s): b2df32a

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -37

app.py CHANGED Viewed

@@ -12,7 +12,8 @@ from io import BytesIO
 from PIL import Image
 from pathlib import Path
 import numpy as np
-from gradio_webrtc import WebRTC
 # 📜 CONFIG
 UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
@@ -26,8 +27,6 @@ MODELS = {
     "GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
     "GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
     "GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
-    "GPT-4 Turbo 🚀": "gpt-4-turbo",
-    "GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
 }
 VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
 TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
@@ -37,7 +36,7 @@ LANGUAGES = {
     "🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
     "🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
 }
-# For WebRTC - Replace with your own if deploying
 RTC_CONFIGURATION = {
     "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
 }
@@ -52,12 +51,10 @@ CSS = """
 # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
 def save_state(data: dict):
-    """A rune that inscribes the session's memory onto a JSON scroll."""
     with open(STATE_FILE, 'w') as f:
         json.dump(data, f, indent=4)
 def load_state() -> dict:
-    """A ritual to recall the session's memory from the JSON scroll."""
     if os.path.exists(STATE_FILE):
         with open(STATE_FILE, 'r') as f:
             try:
@@ -67,37 +64,52 @@ def load_state() -> dict:
     return {}
 def update_and_save(key: str, value, state: dict):
-    """A binding spell that updates a memory and immediately inscribes it."""
     state[key] = value
     save_state(state)
     return state
 def save_key(k: str) -> str:
-    "💾🔑 A rune to bind the Eldritch Key."
     if not k or not k.strip(): return "🚫 Empty Key"
     with open(KEY_FILE, "w") as f: f.write(k.strip())
     return "🔑✅ Key Saved!"
 def get_key(k: str) -> str:
-    "📜🔑 A ritual to summon the Eldritch Key."
     k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
     if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
     o.api_key = k
     return k
 # --- Image & Audio Streaming Functions ---
 def transform_cv2(frame: np.ndarray, transform: str):
     """Applies a magical filter to a single frame from a webcam stream."""
     if transform == "cartoon":
         img_color = cv2.pyrDown(cv2.pyrDown(frame))
         for _ in range(6):
             img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
         img_color = cv2.pyrUp(cv2.pyrUp(img_color))
         img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
-        img_edges = cv2.adaptiveThreshold(
-            cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C,
-            cv2.THRESH_BINARY, 9, 2)
         img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
         return cv2.bitwise_and(img_color, img_edges)
     elif transform == "edges":
@@ -106,34 +118,27 @@ def transform_cv2(frame: np.ndarray, transform: str):
         return np.flipud(frame)
     return frame
-def transcribe_streaming(audio_chunk, history_state):
     """Transcribes a chunk of audio, keeping context from previous chunks."""
     if audio_chunk is None:
-        return history_state, ""
-    # In a real scenario, you would use a streaming-capable ASR model.
-    # Here, we simulate it by transcribing each chunk individually.
-    # This is a placeholder for a more complex implementation.
-    get_key(os.getenv("OPENAI_KEY", "")) # Ensure API key is set
-    # Save chunk to a temporary file to use with OpenAI API
-    temp_wav_path = "temp_chunk.wav"
     sample_rate, data = audio_chunk
-    import soundfile as sf
     sf.write(temp_wav_path, data, sample_rate)
     try:
         with open(temp_wav_path, "rb") as audio_file:
             transcript = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
         new_text = transcript.text
     except Exception as e:
         print(f"Transcription error: {e}")
-        new_text = "(...)"
     history_state += new_text + " "
     return history_state, history_state
-# --- Other Functions (TTS, etc.) ---
 def generate_speech(api_key, tts_model, voice, text, language_key, format, progress=gr.Progress()):
     get_key(api_key)
     language = LANGUAGES.get(language_key, "English")
@@ -174,8 +179,7 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
-            # This is a simplified invoke_oracle for text-only chat
-            text_event = text_prompt.submit(fn=lambda k, m, p, h: invoke_oracle(k, m, "You are a helpful AI.", [{"type": "text", "text": p}], h), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
         with gr.TabItem("🖼️ Streaming Image"):
             gr.Markdown(H2.format("Live Image Enchantments"))
@@ -191,16 +195,17 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
                 mic_input = gr.Audio(sources="microphone", streaming=True)
                 transcript_output = gr.Textbox(label="Transcript", interactive=False)
             transcript_state = gr.State(value="")
-            mic_input.stream(transcribe_streaming, [mic_input, transcript_state], [transcript_state, transcript_output], time_limit=20, stream_every=1)
-        with gr.TabItem("👁️ Object Detection"):
-            gr.Markdown(H2.format("Live Scrying with YOLOv10"))
-            gr.HTML("<h3 style='text-align: center'>Requires a separate inference server for YOLOv10. This is a UI placeholder.</h3>")
             with gr.Column(elem_classes=["my-column"]):
                 with gr.Group(elem_classes=["my-group"]):
                     webrtc_stream = WebRTC(label="Stream", rtc_configuration=RTC_CONFIGURATION)
                     conf_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
-            # Placeholder for the actual stream event handler which would call the YOLOv10 model
             # webrtc_stream.stream(fn=detection_placeholder, inputs=[webrtc_stream, conf_threshold], outputs=[webrtc_stream], time_limit=10)
         with gr.TabItem("🔊 Speech Synthesis"):
@@ -227,7 +232,4 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
     text_event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
 if __name__ == "__main__":
-    # A placeholder function for the YOLOv10 detection since we don't have the model loaded here.
-    def detection_placeholder(image, conf):
-        return image # Just return the image as is.
     demo.launch(share=True, debug=True)

 from PIL import Image
 from pathlib import Path
 import numpy as np
+from fastrtc.gradio import WebRTC
+import soundfile as sf
 # 📜 CONFIG
 UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
     "GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
     "GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
     "GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
 }
 VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
 TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
     "🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
     "🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
 }
+# For WebRTC - Replace with your own if deploying on a cloud provider
 RTC_CONFIGURATION = {
     "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
 }
 # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
 def save_state(data: dict):
     with open(STATE_FILE, 'w') as f:
         json.dump(data, f, indent=4)
 def load_state() -> dict:
     if os.path.exists(STATE_FILE):
         with open(STATE_FILE, 'r') as f:
             try:
     return {}
 def update_and_save(key: str, value, state: dict):
     state[key] = value
     save_state(state)
     return state
 def save_key(k: str) -> str:
     if not k or not k.strip(): return "🚫 Empty Key"
     with open(KEY_FILE, "w") as f: f.write(k.strip())
     return "🔑✅ Key Saved!"
 def get_key(k: str) -> str:
     k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
     if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
     o.api_key = k
     return k
+def invoke_oracle(scribe_key: str, model_key: str, system_prompt: str, user_content: list, history: list):
+    get_key(scribe_key)
+    model_name = MODELS.get(model_key, "gpt-4o")
+    messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
+    try:
+        prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
+        history.append({"role": "user", "content": "..."})
+        history.append({"role": "assistant", "content": ""})
+        for chunk in prophecy:
+            if chunk.choices[0].delta.content:
+                history[-1]['content'] += chunk.choices[0].delta.content
+                yield history
+    except Exception as e:
+        yield history + [{"role": "assistant", "content": f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"}]
+def handle_text_submission(api_key, model, prompt, history):
+    """A clear path for text quests to the Oracle."""
+    yield from invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], history)
 # --- Image & Audio Streaming Functions ---
 def transform_cv2(frame: np.ndarray, transform: str):
     """Applies a magical filter to a single frame from a webcam stream."""
+    if frame is None: return None
     if transform == "cartoon":
         img_color = cv2.pyrDown(cv2.pyrDown(frame))
         for _ in range(6):
             img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
         img_color = cv2.pyrUp(cv2.pyrUp(img_color))
         img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        img_edges = cv2.adaptiveThreshold(cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 2)
         img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
         return cv2.bitwise_and(img_color, img_edges)
     elif transform == "edges":
         return np.flipud(frame)
     return frame
+def transcribe_streaming(api_key, audio_chunk, history_state):
     """Transcribes a chunk of audio, keeping context from previous chunks."""
     if audio_chunk is None:
+        return history_state, history_state
+    get_key(api_key)
     sample_rate, data = audio_chunk
+    temp_wav_path = f"temp_chunk_{hash(data.tobytes())}.wav"
     sf.write(temp_wav_path, data, sample_rate)
     try:
         with open(temp_wav_path, "rb") as audio_file:
             transcript = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
         new_text = transcript.text
     except Exception as e:
         print(f"Transcription error: {e}")
+        new_text = ""
+    finally:
+        if os.path.exists(temp_wav_path):
+            os.remove(temp_wav_path)
     history_state += new_text + " "
     return history_state, history_state
 def generate_speech(api_key, tts_model, voice, text, language_key, format, progress=gr.Progress()):
     get_key(api_key)
     language = LANGUAGES.get(language_key, "English")
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
+            text_event = text_prompt.submit(fn=handle_text_submission, inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
         with gr.TabItem("🖼️ Streaming Image"):
             gr.Markdown(H2.format("Live Image Enchantments"))
                 mic_input = gr.Audio(sources="microphone", streaming=True)
                 transcript_output = gr.Textbox(label="Transcript", interactive=False)
             transcript_state = gr.State(value="")
+            mic_input.stream(transcribe_streaming, [api_key_box, mic_input, transcript_state], [transcript_state, transcript_output], time_limit=30, stream_every=2)
+        with gr.TabItem("👁️ Object Detection (WebRTC)"):
+            gr.Markdown(H2.format("Live Scrying Spell"))
+            gr.HTML("<h3 style='text-align: center'>NOTE: This is a UI placeholder. A separate inference server for the YOLO model is required for this to function.</h3>")
             with gr.Column(elem_classes=["my-column"]):
                 with gr.Group(elem_classes=["my-group"]):
                     webrtc_stream = WebRTC(label="Stream", rtc_configuration=RTC_CONFIGURATION)
                     conf_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
+            # Placeholder for the actual stream event handler which would call a loaded YOLOv10 model
+            # def detection_placeholder(image, conf): return image
             # webrtc_stream.stream(fn=detection_placeholder, inputs=[webrtc_stream, conf_threshold], outputs=[webrtc_stream], time_limit=10)
         with gr.TabItem("🔊 Speech Synthesis"):
     text_event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
 if __name__ == "__main__":
     demo.launch(share=True, debug=True)