awacke1 commited on
Commit
9675144
·
verified ·
1 Parent(s): 45461f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -64
app.py CHANGED
@@ -10,6 +10,7 @@ import requests
10
  import re
11
  from io import BytesIO
12
  from PIL import Image
 
13
 
14
  # 📜 CONFIG
15
  UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
@@ -17,14 +18,28 @@ KEY_FILE = "key.txt"
17
  STATE_FILE = "app_state.json"
18
  MODELS = {
19
  "GPT-4o ✨": "gpt-4o",
 
 
 
 
 
 
20
  "GPT-4 Turbo 🚀": "gpt-4-turbo",
21
  "GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
22
  }
 
 
 
 
 
 
 
 
 
23
 
24
  # 🎨 STYLE
25
  H1 = "# <font size='7'>{0}</font>"
26
  H2 = "## <font size='6'>{0}</font>"
27
- BTN_STYLE = "<font size='5'>{0}</font>"
28
 
29
  # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
30
  def save_state(data: dict):
@@ -39,7 +54,7 @@ def load_state() -> dict:
39
  try:
40
  return json.load(f)
41
  except json.JSONDecodeError:
42
- return {} # Return empty if scroll is corrupted
43
  return {}
44
 
45
  def update_and_save(key: str, value, state: dict):
@@ -49,75 +64,53 @@ def update_and_save(key: str, value, state: dict):
49
  return state
50
 
51
  def save_key(k: str) -> str:
52
- "💾🔑 A rune to bind the Eldritch Key to the physical realm (disk)."
53
  if not k or not k.strip(): return "🚫 Empty Key"
54
  with open(KEY_FILE, "w") as f: f.write(k.strip())
55
  return "🔑✅ Key Saved!"
56
 
57
  def get_key(k: str) -> str:
58
- "📜🔑 A ritual to summon the Eldritch Key, prioritizing the user's offering, then the bound key, then one from the environment."
59
  k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
60
- if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required to commune with the Oracles.")
61
  o.api_key = k
62
  return k
63
 
64
  def file_to_base64(file_path):
65
- """Encodes a file into a base64 string for embedding in API calls."""
66
  with open(file_path, "rb") as f:
67
  return base64.b64encode(f.read()).decode('utf-8')
68
 
69
  def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
70
- """
71
- A universal pact with any Oracle. It can perceive text, images, and the echoes of past conversations.
72
- """
73
  get_key(scribe_key)
74
-
75
  messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
76
-
77
  try:
78
  prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
79
-
80
  history.append({"role": "user", "content": "..."})
81
  history.append({"role": "assistant", "content": ""})
82
-
83
  for chunk in prophecy:
84
  if chunk.choices[0].delta.content:
85
  history[-1]['content'] += chunk.choices[0].delta.content
86
  yield history
87
  except Exception as e:
88
- error_message = f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"
89
- yield history + [{"role": "assistant", "content": error_message}]
90
 
91
  # --- Modality-Specific Summoning Rituals ---
92
 
93
  def summon_vision_from_image(api_key, model, prompt, image_path, history):
94
- "A ritual to grant sight to the Oracle, allowing it to perceive an image."
95
- if image_path is None:
96
- raise gr.Error("An image must be provided to summon vision.")
97
-
98
  b64_image = file_to_base64(image_path.name)
99
- user_content = [
100
- {"type": "text", "text": prompt},
101
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
102
- ]
103
- system_prompt = "You are a helpful assistant that analyzes images. Respond in Markdown."
104
- yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
105
 
106
  def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
107
- "A rite to translate spoken words from an audio file into text, then seek the Oracle's wisdom upon it."
108
- if audio_path is None:
109
- raise gr.Error("An audio file must be provided to summon its echo.")
110
  get_key(api_key)
111
  with open(audio_path.name, "rb") as audio_file:
112
  transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
113
-
114
  full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
115
- user_content = [{"type": "text", "text": full_prompt}]
116
- system_prompt = "You are a helpful assistant analyzing an audio transcript. Summarize it and answer questions. Respond in Markdown."
117
- yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
118
 
119
  def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
120
- "Extracts the written word from PDF or text files to present to the Oracle."
121
  if file_path is None: raise gr.Error("A file must be provided.")
122
  text_content = ""
123
  if file_path.name.lower().endswith('.pdf'):
@@ -126,18 +119,13 @@ def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
126
  else:
127
  with open(file_path.name, 'r', encoding='utf-8') as f:
128
  text_content = f.read()
129
-
130
  full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
131
- user_content = [{"type": "text", "text": full_prompt}]
132
- system_prompt = "You are a helpful assistant analyzing a document. Summarize it and answer questions. Respond in Markdown."
133
- yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
134
 
135
  def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
136
- "A grand ritual to divine meaning from a video's moving pictures and spoken words."
137
  if video_path is None: raise gr.Error("A video must be provided.")
138
  get_key(api_key)
139
  base_video_path, _ = os.path.splitext(video_path.name)
140
-
141
  progress(0.1, desc="🔮 Extracting Audio...")
142
  audio_path = f"{base_video_path}.mp3"
143
  transcript_text = "No audio found."
@@ -149,7 +137,6 @@ def summon_chronicle_from_video(api_key, model, prompt, video_path, history, pro
149
  transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
150
  except Exception as e:
151
  print(f"Audio failed: {e}")
152
-
153
  progress(0.6, desc="🖼️ Sampling Frames...")
154
  base64Frames = []
155
  video = cv2.VideoCapture(video_path.name)
@@ -163,21 +150,52 @@ def summon_chronicle_from_video(api_key, model, prompt, video_path, history, pro
163
  _, buffer = cv2.imencode(".jpg", frame)
164
  base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
165
  video.release()
166
-
167
  progress(0.8, desc="🌀 Consulting Oracle...")
168
- user_content = [
169
- {"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"},
170
- *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
171
- ]
172
- system_prompt = "You are a helpful video analyst. Use the frames and transcript to summarize and answer questions. Respond in Markdown."
173
- yield from invoke_oracle(api_key, model, system_prompt, user_content, history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  # 🔮 UI
176
  with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
177
- # --- Load State & Create State Holder ---
178
  initial_state = load_state()
179
  app_state = gr.State(initial_state)
180
-
181
  gr.Markdown(H1.format(UI_TITLE))
182
 
183
  with gr.Accordion("🔑 Eldritch Key & Oracle Selection", open=True):
@@ -193,11 +211,7 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
193
  with gr.Tabs():
194
  with gr.TabItem("💬 Chat"):
195
  text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
196
- text_event = text_prompt.submit(
197
- fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist),
198
- inputs=[api_key_box, model_selector, text_prompt, chatbot],
199
- outputs=chatbot
200
- )
201
 
202
  with gr.TabItem("🖼️ Image"):
203
  with gr.Row():
@@ -225,20 +239,33 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
225
  doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
226
  doc_btn = gr.Button("📖 Summon Wisdom")
227
  doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  # --- Autosave Event Listeners ---
230
- # Simple text/dropdown inputs
231
- api_key_box.change(update_and_save, [gr.State('api_key'), api_key_box, app_state], app_state)
232
- model_selector.change(update_and_save, [gr.State('model'), model_selector, app_state], app_state)
233
- text_prompt.change(update_and_save, [gr.State('text_prompt'), text_prompt, app_state], app_state)
234
- image_prompt.change(update_and_save, [gr.State('image_prompt'), image_prompt, app_state], app_state)
235
- audio_prompt.change(update_and_save, [gr.State('audio_prompt'), audio_prompt, app_state], app_state)
236
- video_prompt.change(update_and_save, [gr.State('video_prompt'), video_prompt, app_state], app_state)
237
- doc_prompt.change(update_and_save, [gr.State('doc_prompt'), doc_prompt, app_state], app_state)
238
-
239
- # Chatbot history, saved after each interaction
240
  for event in [text_event, image_event, audio_event, video_event, doc_event]:
241
  event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
242
 
243
  if __name__ == "__main__":
244
  demo.launch(share=True, debug=True)
 
 
10
  import re
11
  from io import BytesIO
12
  from PIL import Image
13
+ from pathlib import Path
14
 
15
  # 📜 CONFIG
16
  UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
 
18
  STATE_FILE = "app_state.json"
19
  MODELS = {
20
  "GPT-4o ✨": "gpt-4o",
21
+ "o3 (Advanced Reasoning) �": "gpt-4-turbo", # Placeholder
22
+ "o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
23
+ "o4-mini-high (Vision) 👁️‍🗨️": "gpt-4o", # Placeholder
24
+ "GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
25
+ "GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
26
+ "GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
27
  "GPT-4 Turbo 🚀": "gpt-4-turbo",
28
  "GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
29
  }
30
+ VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
31
+ TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
32
+ FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"]
33
+ LANGUAGES = {
34
+ "🇬🇧 English": "English", "🇨🇳 Chinese": "Chinese", "🇫🇷 French": "French", "🇩🇪 German": "German",
35
+ "🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
36
+ "🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
37
+ }
38
+
39
 
40
  # 🎨 STYLE
41
  H1 = "# <font size='7'>{0}</font>"
42
  H2 = "## <font size='6'>{0}</font>"
 
43
 
44
  # 🪄 HELPERS, LORE & AUTOSAVE RITUALS
45
  def save_state(data: dict):
 
54
  try:
55
  return json.load(f)
56
  except json.JSONDecodeError:
57
+ return {}
58
  return {}
59
 
60
  def update_and_save(key: str, value, state: dict):
 
64
  return state
65
 
66
  def save_key(k: str) -> str:
67
+ "💾🔑 A rune to bind the Eldritch Key."
68
  if not k or not k.strip(): return "🚫 Empty Key"
69
  with open(KEY_FILE, "w") as f: f.write(k.strip())
70
  return "🔑✅ Key Saved!"
71
 
72
  def get_key(k: str) -> str:
73
+ "📜🔑 A ritual to summon the Eldritch Key."
74
  k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
75
+ if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
76
  o.api_key = k
77
  return k
78
 
79
  def file_to_base64(file_path):
 
80
  with open(file_path, "rb") as f:
81
  return base64.b64encode(f.read()).decode('utf-8')
82
 
83
  def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
 
 
 
84
  get_key(scribe_key)
 
85
  messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
 
86
  try:
87
  prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
 
88
  history.append({"role": "user", "content": "..."})
89
  history.append({"role": "assistant", "content": ""})
 
90
  for chunk in prophecy:
91
  if chunk.choices[0].delta.content:
92
  history[-1]['content'] += chunk.choices[0].delta.content
93
  yield history
94
  except Exception as e:
95
+ yield history + [{"role": "assistant", "content": f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"}]
 
96
 
97
  # --- Modality-Specific Summoning Rituals ---
98
 
99
  def summon_vision_from_image(api_key, model, prompt, image_path, history):
100
+ if image_path is None: raise gr.Error("An image must be provided.")
 
 
 
101
  b64_image = file_to_base64(image_path.name)
102
+ user_content = [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}]
103
+ yield from invoke_oracle(api_key, model, "You are an assistant that analyzes images. Respond in Markdown.", user_content, history)
 
 
 
 
104
 
105
  def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
106
+ if audio_path is None: raise gr.Error("An audio file must be provided.")
 
 
107
  get_key(api_key)
108
  with open(audio_path.name, "rb") as audio_file:
109
  transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
 
110
  full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
111
+ yield from invoke_oracle(api_key, model, "You analyze audio transcripts. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
 
 
112
 
113
  def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
 
114
  if file_path is None: raise gr.Error("A file must be provided.")
115
  text_content = ""
116
  if file_path.name.lower().endswith('.pdf'):
 
119
  else:
120
  with open(file_path.name, 'r', encoding='utf-8') as f:
121
  text_content = f.read()
 
122
  full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
123
+ yield from invoke_oracle(api_key, model, "You analyze documents. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
 
 
124
 
125
  def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
 
126
  if video_path is None: raise gr.Error("A video must be provided.")
127
  get_key(api_key)
128
  base_video_path, _ = os.path.splitext(video_path.name)
 
129
  progress(0.1, desc="🔮 Extracting Audio...")
130
  audio_path = f"{base_video_path}.mp3"
131
  transcript_text = "No audio found."
 
137
  transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
138
  except Exception as e:
139
  print(f"Audio failed: {e}")
 
140
  progress(0.6, desc="🖼️ Sampling Frames...")
141
  base64Frames = []
142
  video = cv2.VideoCapture(video_path.name)
 
150
  _, buffer = cv2.imencode(".jpg", frame)
151
  base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
152
  video.release()
 
153
  progress(0.8, desc="🌀 Consulting Oracle...")
154
+ user_content = [{"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"}, *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)]
155
+ yield from invoke_oracle(api_key, model, "You are a video analyst. Respond in Markdown.", user_content, history)
156
+
157
+ def generate_speech(api_key, tts_model, voice, text, language, format, progress=gr.Progress()):
158
+ """A ritual to give voice to the written word, in any tongue."""
159
+ get_key(api_key)
160
+
161
+ # Step 1: Translate the text if the language is not English
162
+ progress(0.2, desc=f"Translating to {language}...")
163
+ translated_text = text
164
+ if language != "English":
165
+ try:
166
+ response = o.chat.completions.create(
167
+ model="gpt-4o",
168
+ messages=[
169
+ {"role": "system", "content": f"You are a translator. Translate the following text to {language}. Output only the translated text."},
170
+ {"role": "user", "content": text}
171
+ ],
172
+ temperature=0
173
+ )
174
+ translated_text = response.choices[0].message.content
175
+ except Exception as e:
176
+ raise gr.Error(f"Translation failed: {e}")
177
+
178
+ # Step 2: Generate speech from the (possibly translated) text
179
+ progress(0.6, desc="Summoning voice...")
180
+ speech_file_path = Path(__file__).parent / f"speech.{format}"
181
+ try:
182
+ response = o.audio.speech.create(
183
+ model=tts_model,
184
+ voice=voice,
185
+ input=translated_text,
186
+ response_format=format
187
+ )
188
+ response.stream_to_file(speech_file_path)
189
+ except Exception as e:
190
+ raise gr.Error(f"Speech generation failed: {e}")
191
+
192
+ progress(1.0, desc="Voice summoned!")
193
+ return str(speech_file_path), translated_text
194
 
195
  # 🔮 UI
196
  with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
 
197
  initial_state = load_state()
198
  app_state = gr.State(initial_state)
 
199
  gr.Markdown(H1.format(UI_TITLE))
200
 
201
  with gr.Accordion("🔑 Eldritch Key & Oracle Selection", open=True):
 
211
  with gr.Tabs():
212
  with gr.TabItem("💬 Chat"):
213
  text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
214
+ text_event = text_prompt.submit(fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
 
 
 
 
215
 
216
  with gr.TabItem("🖼️ Image"):
217
  with gr.Row():
 
239
  doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
240
  doc_btn = gr.Button("📖 Summon Wisdom")
241
  doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
242
+
243
+ with gr.TabItem("🔊 Speech Synthesis"):
244
+ gr.Markdown(H2.format("Give Voice to Words"))
245
+ tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
246
+ with gr.Row():
247
+ tts_voice = gr.Dropdown(choices=VOICES, label="🗣️ Voice", value=initial_state.get('tts_voice', "alloy"))
248
+ tts_model_select = gr.Dropdown(choices=TTS_MODELS, label="🧠 TTS Model", value=initial_state.get('tts_model', "gpt-4o-mini-tts"))
249
+ tts_format = gr.Dropdown(choices=FORMATS, label="📦 Format", value=initial_state.get('tts_format', "mp3"))
250
+ tts_text_input = gr.Textbox(label="📜 Text to Speak", lines=4, placeholder="Enter text here...", value=initial_state.get('tts_text', ''))
251
+ tts_btn = gr.Button("🔊 Generate Speech")
252
+ tts_translated_text = gr.Textbox(label="Translated Text (Output)", interactive=False)
253
+ tts_audio_output = gr.Audio(label="🎧 Spoken Word", type="filepath")
254
+ tts_event = tts_btn.click(generate_speech, [api_key_box, tts_model_select, tts_voice, tts_text_input, tts_language, tts_format], [tts_audio_output, tts_translated_text])
255
 
256
  # --- Autosave Event Listeners ---
257
+ components_to_save = {
258
+ 'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
259
+ 'image_prompt': image_prompt, 'audio_prompt': audio_prompt, 'video_prompt': video_prompt,
260
+ 'doc_prompt': doc_prompt, 'tts_language': tts_language, 'tts_voice': tts_voice,
261
+ 'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
262
+ }
263
+ for key, component in components_to_save.items():
264
+ component.change(update_and_save, [gr.State(key), component, app_state], app_state)
265
+
 
266
  for event in [text_event, image_event, audio_event, video_event, doc_event]:
267
  event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
268
 
269
  if __name__ == "__main__":
270
  demo.launch(share=True, debug=True)
271
+