Spaces:

awacke1
/

GradioGPTGameGenerator

Runtime error

App Files Files Community

awacke1 commited on 4 days ago

Commit

60e3497

verified ·

1 Parent(s): ed00060

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -131

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ import re
 from io import BytesIO
 from PIL import Image
 from pathlib import Path
 # 📜 CONFIG
 UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
@@ -18,7 +20,7 @@ KEY_FILE = "key.txt"
 STATE_FILE = "app_state.json"
 MODELS = {
     "GPT-4o ✨": "gpt-4o",
-    "o3 (Advanced Reasoning) �": "gpt-4-turbo", # Placeholder
     "o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
     "o4-mini-high (Vision) 👁️‍🗨️": "gpt-4o", # Placeholder
     "GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
@@ -35,11 +37,18 @@ LANGUAGES = {
     "🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
     "🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
 }
 # 🎨 STYLE
 H1 = "# <font size='7'>{0}</font>"
 H2 = "## <font size='6'>{0}</font>"
 # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
 def save_state(data: dict):
@@ -76,124 +85,78 @@ def get_key(k: str) -> str:
     o.api_key = k
     return k
-def file_to_base64(file_path):
-    with open(file_path, "rb") as f:
-        return base64.b64encode(f.read()).decode('utf-8')
-def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
-    get_key(scribe_key)
-    messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
     try:
-        prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
-        history.append({"role": "user", "content": "..."})
-        history.append({"role": "assistant", "content": ""})
-        for chunk in prophecy:
-            if chunk.choices[0].delta.content:
-                history[-1]['content'] += chunk.choices[0].delta.content
-                yield history
     except Exception as e:
-        yield history + [{"role": "assistant", "content": f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"}]
-# --- Modality-Specific Summoning Rituals ---
-def summon_vision_from_image(api_key, model, prompt, image_path, history):
-    if image_path is None: raise gr.Error("An image must be provided.")
-    b64_image = file_to_base64(image_path.name)
-    user_content = [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}]
-    yield from invoke_oracle(api_key, model, "You are an assistant that analyzes images. Respond in Markdown.", user_content, history)
-def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
-    if audio_path is None: raise gr.Error("An audio file must be provided.")
-    get_key(api_key)
-    with open(audio_path.name, "rb") as audio_file:
-        transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
-    full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
-    yield from invoke_oracle(api_key, model, "You analyze audio transcripts. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
-def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
-    if file_path is None: raise gr.Error("A file must be provided.")
-    text_content = ""
-    if file_path.name.lower().endswith('.pdf'):
-        with fitz.open(file_path.name) as doc:
-            text_content = "".join(page.get_text() for page in doc)
-    else:
-        with open(file_path.name, 'r', encoding='utf-8') as f:
-            text_content = f.read()
-    full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
-    yield from invoke_oracle(api_key, model, "You analyze documents. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
-def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
-    if video_path is None: raise gr.Error("A video must be provided.")
-    get_key(api_key)
-    base_video_path, _ = os.path.splitext(video_path.name)
-    progress(0.1, desc="🔮 Extracting Audio...")
-    audio_path = f"{base_video_path}.mp3"
-    transcript_text = "No audio found."
-    try:
-        with VideoFileClip(video_path.name) as clip:
-            clip.audio.write_audiofile(audio_path, bitrate="32k", logger=None)
-        progress(0.3, desc="🎤 Transcribing Audio...")
-        with open(audio_path, "rb") as audio_file:
-            transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
-    except Exception as e:
-        print(f"Audio failed: {e}")
-    progress(0.6, desc="🖼️ Sampling Frames...")
-    base64Frames = []
-    video = cv2.VideoCapture(video_path.name)
-    total_frames, fps = int(video.get(cv2.CAP_PROP_FRAME_COUNT)), video.get(cv2.CAP_PROP_FPS)
-    frames_to_skip = int(fps * 2)
-    for curr_frame in range(0, total_frames - 1, frames_to_skip):
-        if len(base64Frames) >= 10: break
-        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
-        success, frame = video.read()
-        if not success: break
-        _, buffer = cv2.imencode(".jpg", frame)
-        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
-    video.release()
-    progress(0.8, desc="🌀 Consulting Oracle...")
-    user_content = [{"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"}, *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)]
-    yield from invoke_oracle(api_key, model, "You are a video analyst. Respond in Markdown.", user_content, history)
-def generate_speech(api_key, tts_model, voice, text, language, format, progress=gr.Progress()):
-    """A ritual to give voice to the written word, in any tongue."""
     get_key(api_key)
-    # Step 1: Translate the text if the language is not English
     progress(0.2, desc=f"Translating to {language}...")
     translated_text = text
     if language != "English":
         try:
-            response = o.chat.completions.create(
-                model="gpt-4o",
-                messages=[
-                    {"role": "system", "content": f"You are a translator. Translate the following text to {language}. Output only the translated text."},
-                    {"role": "user", "content": text}
-                ],
-                temperature=0
-            )
             translated_text = response.choices[0].message.content
         except Exception as e:
             raise gr.Error(f"Translation failed: {e}")
-    # Step 2: Generate speech from the (possibly translated) text
     progress(0.6, desc="Summoning voice...")
     speech_file_path = Path(__file__).parent / f"speech.{format}"
     try:
-        response = o.audio.speech.create(
-            model=tts_model,
-            voice=voice,
-            input=translated_text,
-            response_format=format
-        )
         response.stream_to_file(speech_file_path)
     except Exception as e:
         raise gr.Error(f"Speech generation failed: {e}")
     progress(1.0, desc="Voice summoned!")
     return str(speech_file_path), translated_text
 # 🔮 UI
-with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
     initial_state = load_state()
     app_state = gr.State(initial_state)
     gr.Markdown(H1.format(UI_TITLE))
@@ -206,40 +169,40 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
         model_selector = gr.Dropdown(choices=list(MODELS.keys()), label="🔮 Oracle", value=initial_state.get('model', "GPT-4o ✨"))
         save_btn.click(save_key, inputs=api_key_box, outputs=status_txt)
-    chatbot = gr.Chatbot(height=500, label="📜 Scroll of Conversation", type='messages', value=initial_state.get('chatbot', []))
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
-            text_event = text_prompt.submit(fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
-        with gr.TabItem("🖼️ Image"):
             with gr.Row():
-                image_input = gr.File(label="Upload Image", type="file")
-                image_output = gr.Image(label="Your Image", type="filepath", interactive=False)
-            image_prompt = gr.Textbox(label="Image Prompt:", value=initial_state.get('image_prompt', "What is in this image?"))
-            image_btn = gr.Button("👁️ Summon Vision")
-            image_input.change(lambda x: x, inputs=image_input, outputs=image_output)
-            image_event = image_btn.click(summon_vision_from_image, [api_key_box, model_selector, image_prompt, image_input, chatbot], chatbot)
-        with gr.TabItem("🎤 Audio"):
-            audio_input = gr.File(label="Upload Audio", type="file")
-            audio_prompt = gr.Textbox(label="Audio Prompt:", value=initial_state.get('audio_prompt', "Summarize this audio."))
-            audio_btn = gr.Button("🗣️ Summon Echo")
-            audio_event = audio_btn.click(summon_echo_from_audio, [api_key_box, model_selector, audio_prompt, audio_input, chatbot], chatbot)
-        with gr.TabItem("🎥 Video"):
-            video_input = gr.File(label="Upload Video", type="file")
-            video_prompt = gr.Textbox(label="Video Prompt:", value=initial_state.get('video_prompt', "Summarize this video."))
-            video_btn = gr.Button("🎬 Summon Chronicle")
-            video_event = video_btn.click(summon_chronicle_from_video, [api_key_box, model_selector, video_prompt, video_input, chatbot], chatbot)
-        with gr.TabItem("📄 Document"):
-            doc_input = gr.File(label="Upload PDF or TXT", type="file")
-            doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
-            doc_btn = gr.Button("📖 Summon Wisdom")
-            doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
         with gr.TabItem("🔊 Speech Synthesis"):
             gr.Markdown(H2.format("Give Voice to Words"))
             tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
@@ -256,15 +219,15 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
     # --- Autosave Event Listeners ---
     components_to_save = {
         'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
-        'image_prompt': image_prompt, 'audio_prompt': audio_prompt, 'video_prompt': video_prompt,
-        'doc_prompt': doc_prompt, 'tts_language': tts_language, 'tts_voice': tts_voice,
         'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
     }
     for key, component in components_to_save.items():
         component.change(update_and_save, [gr.State(key), component, app_state], app_state)
-    for event in [text_event, image_event, audio_event, video_event, doc_event]:
-        event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
 if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

 from io import BytesIO
 from PIL import Image
 from pathlib import Path
+import numpy as np
+from gradio_webrtc import WebRTC
 # 📜 CONFIG
 UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
 STATE_FILE = "app_state.json"
 MODELS = {
     "GPT-4o ✨": "gpt-4o",
+    "o3 (Advanced Reasoning) 🧠": "gpt-4-turbo", # Placeholder
     "o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
     "o4-mini-high (Vision) 👁️‍🗨️": "gpt-4o", # Placeholder
     "GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
     "🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
     "🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
 }
+# For WebRTC - Replace with your own if deploying
+RTC_CONFIGURATION = {
+    "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
+}
 # 🎨 STYLE
 H1 = "# <font size='7'>{0}</font>"
 H2 = "## <font size='6'>{0}</font>"
+CSS = """
+.my-group {max-width: 500px !important; max-height: 500px !important;}
+.my-column {display: flex !important; justify-content: center !important; align-items: center !important;}
+"""
 # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
 def save_state(data: dict):
     o.api_key = k
     return k
+# --- Image & Audio Streaming Functions ---
+def transform_cv2(frame: np.ndarray, transform: str):
+    """Applies a magical filter to a single frame from a webcam stream."""
+    if transform == "cartoon":
+        img_color = cv2.pyrDown(cv2.pyrDown(frame))
+        for _ in range(6):
+            img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
+        img_color = cv2.pyrUp(cv2.pyrUp(img_color))
+        img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        img_edges = cv2.adaptiveThreshold(
+            cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C,
+            cv2.THRESH_BINARY, 9, 2)
+        img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
+        return cv2.bitwise_and(img_color, img_edges)
+    elif transform == "edges":
+        return cv2.cvtColor(cv2.Canny(frame, 100, 200), cv2.COLOR_GRAY2BGR)
+    elif transform == "flip":
+        return np.flipud(frame)
+    return frame
+def transcribe_streaming(audio_chunk, history_state):
+    """Transcribes a chunk of audio, keeping context from previous chunks."""
+    if audio_chunk is None:
+        return history_state, ""
+    # In a real scenario, you would use a streaming-capable ASR model.
+    # Here, we simulate it by transcribing each chunk individually.
+    # This is a placeholder for a more complex implementation.
+    get_key(os.getenv("OPENAI_KEY", "")) # Ensure API key is set
+    # Save chunk to a temporary file to use with OpenAI API
+    temp_wav_path = "temp_chunk.wav"
+    sample_rate, data = audio_chunk
+    import soundfile as sf
+    sf.write(temp_wav_path, data, sample_rate)
     try:
+        with open(temp_wav_path, "rb") as audio_file:
+            transcript = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
+        new_text = transcript.text
     except Exception as e:
+        print(f"Transcription error: {e}")
+        new_text = "(...)"
+    history_state += new_text + " "
+    return history_state, history_state
+# --- Other Functions (TTS, etc.) ---
+def generate_speech(api_key, tts_model, voice, text, language_key, format, progress=gr.Progress()):
     get_key(api_key)
+    language = LANGUAGES.get(language_key, "English")
     progress(0.2, desc=f"Translating to {language}...")
     translated_text = text
     if language != "English":
         try:
+            response = o.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": f"Translate to {language}. Output only the translation."}, {"role": "user", "content": text}], temperature=0)
             translated_text = response.choices[0].message.content
         except Exception as e:
             raise gr.Error(f"Translation failed: {e}")
     progress(0.6, desc="Summoning voice...")
     speech_file_path = Path(__file__).parent / f"speech.{format}"
     try:
+        response = o.audio.speech.create(model=tts_model, voice=voice, input=translated_text, response_format=format)
         response.stream_to_file(speech_file_path)
     except Exception as e:
         raise gr.Error(f"Speech generation failed: {e}")
     progress(1.0, desc="Voice summoned!")
     return str(speech_file_path), translated_text
 # 🔮 UI
+with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange"), css=CSS) as demo:
     initial_state = load_state()
     app_state = gr.State(initial_state)
     gr.Markdown(H1.format(UI_TITLE))
         model_selector = gr.Dropdown(choices=list(MODELS.keys()), label="🔮 Oracle", value=initial_state.get('model', "GPT-4o ✨"))
         save_btn.click(save_key, inputs=api_key_box, outputs=status_txt)
+    chatbot = gr.Chatbot(height=400, label="📜 Scroll of Conversation", type='messages', value=initial_state.get('chatbot', []))
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
+            # This is a simplified invoke_oracle for text-only chat
+            text_event = text_prompt.submit(fn=lambda k, m, p, h: invoke_oracle(k, m, "You are a helpful AI.", [{"type": "text", "text": p}], h), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
+        with gr.TabItem("🖼️ Streaming Image"):
+            gr.Markdown(H2.format("Live Image Enchantments"))
+            with gr.Column(elem_classes=["my-column"]):
+                with gr.Group(elem_classes=["my-group"]):
+                    transform_filter = gr.Dropdown(choices=["cartoon", "edges", "flip"], value="flip", label="Transformation")
+                    streaming_image = gr.Image(sources=["webcam"], type="numpy", streaming=True)
+            streaming_image.stream(transform_cv2, [streaming_image, transform_filter], streaming_image, time_limit=30, stream_every=0.1)
+        with gr.TabItem("🎤 Streaming Audio"):
+            gr.Markdown(H2.format("Real-time Transcription Rite"))
             with gr.Row():
+                mic_input = gr.Audio(sources="microphone", streaming=True)
+                transcript_output = gr.Textbox(label="Transcript", interactive=False)
+            transcript_state = gr.State(value="")
+            mic_input.stream(transcribe_streaming, [mic_input, transcript_state], [transcript_state, transcript_output], time_limit=20, stream_every=1)
+        with gr.TabItem("👁️ Object Detection"):
+            gr.Markdown(H2.format("Live Scrying with YOLOv10"))
+            gr.HTML("<h3 style='text-align: center'>Requires a separate inference server for YOLOv10. This is a UI placeholder.</h3>")
+            with gr.Column(elem_classes=["my-column"]):
+                with gr.Group(elem_classes=["my-group"]):
+                    webrtc_stream = WebRTC(label="Stream", rtc_configuration=RTC_CONFIGURATION)
+                    conf_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
+            # Placeholder for the actual stream event handler which would call the YOLOv10 model
+            # webrtc_stream.stream(fn=detection_placeholder, inputs=[webrtc_stream, conf_threshold], outputs=[webrtc_stream], time_limit=10)
         with gr.TabItem("🔊 Speech Synthesis"):
             gr.Markdown(H2.format("Give Voice to Words"))
             tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
     # --- Autosave Event Listeners ---
     components_to_save = {
         'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
+        'tts_language': tts_language, 'tts_voice': tts_voice,
         'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
     }
     for key, component in components_to_save.items():
         component.change(update_and_save, [gr.State(key), component, app_state], app_state)
+    text_event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
 if __name__ == "__main__":
+    # A placeholder function for the YOLOv10 detection since we don't have the model loaded here.
+    def detection_placeholder(image, conf):
+        return image # Just return the image as is.
+    demo.launch(share=True, debug=True)