Spaces:

awacke1
/

GradioGPTGameGenerator

Runtime error

App Files Files Community

awacke1 commited on 4 days ago

Commit

9675144

verified ·

1 Parent(s): 45461f4

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -64

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import requests
 import re
 from io import BytesIO
 from PIL import Image
 # 📜 CONFIG
 UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
@@ -17,14 +18,28 @@ KEY_FILE = "key.txt"
 STATE_FILE = "app_state.json"
 MODELS = {
     "GPT-4o ✨": "gpt-4o",
     "GPT-4 Turbo 🚀": "gpt-4-turbo",
     "GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
 }
 # 🎨 STYLE
 H1 = "# <font size='7'>{0}</font>"
 H2 = "## <font size='6'>{0}</font>"
-BTN_STYLE = "<font size='5'>{0}</font>"
 # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
 def save_state(data: dict):
@@ -39,7 +54,7 @@ def load_state() -> dict:
             try:
                 return json.load(f)
             except json.JSONDecodeError:
-                return {} # Return empty if scroll is corrupted
     return {}
 def update_and_save(key: str, value, state: dict):
@@ -49,75 +64,53 @@ def update_and_save(key: str, value, state: dict):
     return state
 def save_key(k: str) -> str:
-    "💾🔑 A rune to bind the Eldritch Key to the physical realm (disk)."
     if not k or not k.strip(): return "🚫 Empty Key"
     with open(KEY_FILE, "w") as f: f.write(k.strip())
     return "🔑✅ Key Saved!"
 def get_key(k: str) -> str:
-    "📜🔑 A ritual to summon the Eldritch Key, prioritizing the user's offering, then the bound key, then one from the environment."
     k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
-    if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required to commune with the Oracles.")
     o.api_key = k
     return k
 def file_to_base64(file_path):
-    """Encodes a file into a base64 string for embedding in API calls."""
     with open(file_path, "rb") as f:
         return base64.b64encode(f.read()).decode('utf-8')
 def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
-    """
-    A universal pact with any Oracle. It can perceive text, images, and the echoes of past conversations.
-    """
     get_key(scribe_key)
     messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
     try:
         prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
         history.append({"role": "user", "content": "..."})
         history.append({"role": "assistant", "content": ""})
         for chunk in prophecy:
             if chunk.choices[0].delta.content:
                 history[-1]['content'] += chunk.choices[0].delta.content
                 yield history
     except Exception as e:
-        error_message = f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"
-        yield history + [{"role": "assistant", "content": error_message}]
 # --- Modality-Specific Summoning Rituals ---
 def summon_vision_from_image(api_key, model, prompt, image_path, history):
-    "A ritual to grant sight to the Oracle, allowing it to perceive an image."
-    if image_path is None:
-        raise gr.Error("An image must be provided to summon vision.")
     b64_image = file_to_base64(image_path.name)
-    user_content = [
-        {"type": "text", "text": prompt},
-        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
-    ]
-    system_prompt = "You are a helpful assistant that analyzes images. Respond in Markdown."
-    yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
 def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
-    "A rite to translate spoken words from an audio file into text, then seek the Oracle's wisdom upon it."
-    if audio_path is None:
-        raise gr.Error("An audio file must be provided to summon its echo.")
     get_key(api_key)
     with open(audio_path.name, "rb") as audio_file:
         transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
     full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
-    user_content = [{"type": "text", "text": full_prompt}]
-    system_prompt = "You are a helpful assistant analyzing an audio transcript. Summarize it and answer questions. Respond in Markdown."
-    yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
 def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
-    "Extracts the written word from PDF or text files to present to the Oracle."
     if file_path is None: raise gr.Error("A file must be provided.")
     text_content = ""
     if file_path.name.lower().endswith('.pdf'):
@@ -126,18 +119,13 @@ def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
     else:
         with open(file_path.name, 'r', encoding='utf-8') as f:
             text_content = f.read()
     full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
-    user_content = [{"type": "text", "text": full_prompt}]
-    system_prompt = "You are a helpful assistant analyzing a document. Summarize it and answer questions. Respond in Markdown."
-    yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
 def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
-    "A grand ritual to divine meaning from a video's moving pictures and spoken words."
     if video_path is None: raise gr.Error("A video must be provided.")
     get_key(api_key)
     base_video_path, _ = os.path.splitext(video_path.name)
     progress(0.1, desc="🔮 Extracting Audio...")
     audio_path = f"{base_video_path}.mp3"
     transcript_text = "No audio found."
@@ -149,7 +137,6 @@ def summon_chronicle_from_video(api_key, model, prompt, video_path, history, pro
             transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
     except Exception as e:
         print(f"Audio failed: {e}")
     progress(0.6, desc="🖼️ Sampling Frames...")
     base64Frames = []
     video = cv2.VideoCapture(video_path.name)
@@ -163,21 +150,52 @@ def summon_chronicle_from_video(api_key, model, prompt, video_path, history, pro
         _, buffer = cv2.imencode(".jpg", frame)
         base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
     video.release()
     progress(0.8, desc="🌀 Consulting Oracle...")
-    user_content = [
-        {"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"},
-        *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
-    ]
-    system_prompt = "You are a helpful video analyst. Use the frames and transcript to summarize and answer questions. Respond in Markdown."
-    yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
 # 🔮 UI
 with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
-    # --- Load State & Create State Holder ---
     initial_state = load_state()
     app_state = gr.State(initial_state)
     gr.Markdown(H1.format(UI_TITLE))
     with gr.Accordion("🔑 Eldritch Key & Oracle Selection", open=True):
@@ -193,11 +211,7 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
-            text_event = text_prompt.submit(
-                fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist),
-                inputs=[api_key_box, model_selector, text_prompt, chatbot],
-                outputs=chatbot
-            )
         with gr.TabItem("🖼️ Image"):
             with gr.Row():
@@ -225,20 +239,33 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
             doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
             doc_btn = gr.Button("📖 Summon Wisdom")
             doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
     # --- Autosave Event Listeners ---
-    # Simple text/dropdown inputs
-    api_key_box.change(update_and_save, [gr.State('api_key'), api_key_box, app_state], app_state)
-    model_selector.change(update_and_save, [gr.State('model'), model_selector, app_state], app_state)
-    text_prompt.change(update_and_save, [gr.State('text_prompt'), text_prompt, app_state], app_state)
-    image_prompt.change(update_and_save, [gr.State('image_prompt'), image_prompt, app_state], app_state)
-    audio_prompt.change(update_and_save, [gr.State('audio_prompt'), audio_prompt, app_state], app_state)
-    video_prompt.change(update_and_save, [gr.State('video_prompt'), video_prompt, app_state], app_state)
-    doc_prompt.change(update_and_save, [gr.State('doc_prompt'), doc_prompt, app_state], app_state)
-    # Chatbot history, saved after each interaction
     for event in [text_event, image_event, audio_event, video_event, doc_event]:
         event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
 if __name__ == "__main__":
     demo.launch(share=True, debug=True)

 import re
 from io import BytesIO
 from PIL import Image
+from pathlib import Path
 # 📜 CONFIG
 UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
 STATE_FILE = "app_state.json"
 MODELS = {
     "GPT-4o ✨": "gpt-4o",
+    "o3 (Advanced Reasoning) �": "gpt-4-turbo", # Placeholder
+    "o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
+    "o4-mini-high (Vision) 👁️‍🗨️": "gpt-4o", # Placeholder
+    "GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
+    "GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
+    "GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
     "GPT-4 Turbo 🚀": "gpt-4-turbo",
     "GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
 }
+VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
+TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
+FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"]
+LANGUAGES = {
+    "🇬🇧 English": "English", "🇨🇳 Chinese": "Chinese", "🇫🇷 French": "French", "🇩🇪 German": "German",
+    "🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
+    "🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
+}
 # 🎨 STYLE
 H1 = "# <font size='7'>{0}</font>"
 H2 = "## <font size='6'>{0}</font>"
 # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
 def save_state(data: dict):
             try:
                 return json.load(f)
             except json.JSONDecodeError:
+                return {}
     return {}
 def update_and_save(key: str, value, state: dict):
     return state
 def save_key(k: str) -> str:
+    "💾🔑 A rune to bind the Eldritch Key."
     if not k or not k.strip(): return "🚫 Empty Key"
     with open(KEY_FILE, "w") as f: f.write(k.strip())
     return "🔑✅ Key Saved!"
 def get_key(k: str) -> str:
+    "📜🔑 A ritual to summon the Eldritch Key."
     k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
+    if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
     o.api_key = k
     return k
 def file_to_base64(file_path):
     with open(file_path, "rb") as f:
         return base64.b64encode(f.read()).decode('utf-8')
 def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
     get_key(scribe_key)
     messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
     try:
         prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
         history.append({"role": "user", "content": "..."})
         history.append({"role": "assistant", "content": ""})
         for chunk in prophecy:
             if chunk.choices[0].delta.content:
                 history[-1]['content'] += chunk.choices[0].delta.content
                 yield history
     except Exception as e:
+        yield history + [{"role": "assistant", "content": f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"}]
 # --- Modality-Specific Summoning Rituals ---
 def summon_vision_from_image(api_key, model, prompt, image_path, history):
+    if image_path is None: raise gr.Error("An image must be provided.")
     b64_image = file_to_base64(image_path.name)
+    user_content = [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}]
+    yield from invoke_oracle(api_key, model, "You are an assistant that analyzes images. Respond in Markdown.", user_content, history)
 def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
+    if audio_path is None: raise gr.Error("An audio file must be provided.")
     get_key(api_key)
     with open(audio_path.name, "rb") as audio_file:
         transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
     full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
+    yield from invoke_oracle(api_key, model, "You analyze audio transcripts. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
 def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
     if file_path is None: raise gr.Error("A file must be provided.")
     text_content = ""
     if file_path.name.lower().endswith('.pdf'):
     else:
         with open(file_path.name, 'r', encoding='utf-8') as f:
             text_content = f.read()
     full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
+    yield from invoke_oracle(api_key, model, "You analyze documents. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
 def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
     if video_path is None: raise gr.Error("A video must be provided.")
     get_key(api_key)
     base_video_path, _ = os.path.splitext(video_path.name)
     progress(0.1, desc="🔮 Extracting Audio...")
     audio_path = f"{base_video_path}.mp3"
     transcript_text = "No audio found."
             transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
     except Exception as e:
         print(f"Audio failed: {e}")
     progress(0.6, desc="🖼️ Sampling Frames...")
     base64Frames = []
     video = cv2.VideoCapture(video_path.name)
         _, buffer = cv2.imencode(".jpg", frame)
         base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
     video.release()
     progress(0.8, desc="🌀 Consulting Oracle...")
+    user_content = [{"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"}, *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)]
+    yield from invoke_oracle(api_key, model, "You are a video analyst. Respond in Markdown.", user_content, history)
+def generate_speech(api_key, tts_model, voice, text, language, format, progress=gr.Progress()):
+    """A ritual to give voice to the written word, in any tongue."""
+    get_key(api_key)
+    # Step 1: Translate the text if the language is not English
+    progress(0.2, desc=f"Translating to {language}...")
+    translated_text = text
+    if language != "English":
+        try:
+            response = o.chat.completions.create(
+                model="gpt-4o",
+                messages=[
+                    {"role": "system", "content": f"You are a translator. Translate the following text to {language}. Output only the translated text."},
+                    {"role": "user", "content": text}
+                ],
+                temperature=0
+            )
+            translated_text = response.choices[0].message.content
+        except Exception as e:
+            raise gr.Error(f"Translation failed: {e}")
+    # Step 2: Generate speech from the (possibly translated) text
+    progress(0.6, desc="Summoning voice...")
+    speech_file_path = Path(__file__).parent / f"speech.{format}"
+    try:
+        response = o.audio.speech.create(
+            model=tts_model,
+            voice=voice,
+            input=translated_text,
+            response_format=format
+        )
+        response.stream_to_file(speech_file_path)
+    except Exception as e:
+        raise gr.Error(f"Speech generation failed: {e}")
+    progress(1.0, desc="Voice summoned!")
+    return str(speech_file_path), translated_text
 # 🔮 UI
 with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
     initial_state = load_state()
     app_state = gr.State(initial_state)
     gr.Markdown(H1.format(UI_TITLE))
     with gr.Accordion("🔑 Eldritch Key & Oracle Selection", open=True):
     with gr.Tabs():
         with gr.TabItem("💬 Chat"):
             text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
+            text_event = text_prompt.submit(fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
         with gr.TabItem("🖼️ Image"):
             with gr.Row():
             doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
             doc_btn = gr.Button("📖 Summon Wisdom")
             doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
+        with gr.TabItem("🔊 Speech Synthesis"):
+            gr.Markdown(H2.format("Give Voice to Words"))
+            tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
+            with gr.Row():
+                tts_voice = gr.Dropdown(choices=VOICES, label="🗣️ Voice", value=initial_state.get('tts_voice', "alloy"))
+                tts_model_select = gr.Dropdown(choices=TTS_MODELS, label="🧠 TTS Model", value=initial_state.get('tts_model', "gpt-4o-mini-tts"))
+                tts_format = gr.Dropdown(choices=FORMATS, label="📦 Format", value=initial_state.get('tts_format', "mp3"))
+            tts_text_input = gr.Textbox(label="📜 Text to Speak", lines=4, placeholder="Enter text here...", value=initial_state.get('tts_text', ''))
+            tts_btn = gr.Button("🔊 Generate Speech")
+            tts_translated_text = gr.Textbox(label="Translated Text (Output)", interactive=False)
+            tts_audio_output = gr.Audio(label="🎧 Spoken Word", type="filepath")
+            tts_event = tts_btn.click(generate_speech, [api_key_box, tts_model_select, tts_voice, tts_text_input, tts_language, tts_format], [tts_audio_output, tts_translated_text])
     # --- Autosave Event Listeners ---
+    components_to_save = {
+        'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
+        'image_prompt': image_prompt, 'audio_prompt': audio_prompt, 'video_prompt': video_prompt,
+        'doc_prompt': doc_prompt, 'tts_language': tts_language, 'tts_voice': tts_voice,
+        'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
+    }
+    for key, component in components_to_save.items():
+        component.change(update_and_save, [gr.State(key), component, app_state], app_state)
     for event in [text_event, image_event, audio_event, video_event, doc_event]:
         event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
 if __name__ == "__main__":
     demo.launch(share=True, debug=True)
+�