Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ import re
|
|
11 |
from io import BytesIO
|
12 |
from PIL import Image
|
13 |
from pathlib import Path
|
|
|
|
|
14 |
|
15 |
# 📜 CONFIG
|
16 |
UI_TITLE = "✨🧙♂️🔮 GPT-4o Omni-Oracle"
|
@@ -18,7 +20,7 @@ KEY_FILE = "key.txt"
|
|
18 |
STATE_FILE = "app_state.json"
|
19 |
MODELS = {
|
20 |
"GPT-4o ✨": "gpt-4o",
|
21 |
-
"o3 (Advanced Reasoning)
|
22 |
"o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
|
23 |
"o4-mini-high (Vision) 👁️🗨️": "gpt-4o", # Placeholder
|
24 |
"GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
|
@@ -35,11 +37,18 @@ LANGUAGES = {
|
|
35 |
"🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
|
36 |
"🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
|
37 |
}
|
38 |
-
|
|
|
|
|
|
|
39 |
|
40 |
# 🎨 STYLE
|
41 |
H1 = "# <font size='7'>{0}</font>"
|
42 |
H2 = "## <font size='6'>{0}</font>"
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# 🪄 HELPERS, LORE & AUTOSAVE RITUALS
|
45 |
def save_state(data: dict):
|
@@ -76,124 +85,78 @@ def get_key(k: str) -> str:
|
|
76 |
o.api_key = k
|
77 |
return k
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
|
84 |
-
get_key(scribe_key)
|
85 |
-
messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
|
86 |
try:
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
for chunk in prophecy:
|
91 |
-
if chunk.choices[0].delta.content:
|
92 |
-
history[-1]['content'] += chunk.choices[0].delta.content
|
93 |
-
yield history
|
94 |
except Exception as e:
|
95 |
-
|
96 |
-
|
97 |
-
# --- Modality-Specific Summoning Rituals ---
|
98 |
-
|
99 |
-
def summon_vision_from_image(api_key, model, prompt, image_path, history):
|
100 |
-
if image_path is None: raise gr.Error("An image must be provided.")
|
101 |
-
b64_image = file_to_base64(image_path.name)
|
102 |
-
user_content = [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}]
|
103 |
-
yield from invoke_oracle(api_key, model, "You are an assistant that analyzes images. Respond in Markdown.", user_content, history)
|
104 |
-
|
105 |
-
def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
|
106 |
-
if audio_path is None: raise gr.Error("An audio file must be provided.")
|
107 |
-
get_key(api_key)
|
108 |
-
with open(audio_path.name, "rb") as audio_file:
|
109 |
-
transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
110 |
-
full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
|
111 |
-
yield from invoke_oracle(api_key, model, "You analyze audio transcripts. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
|
112 |
-
|
113 |
-
def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
|
114 |
-
if file_path is None: raise gr.Error("A file must be provided.")
|
115 |
-
text_content = ""
|
116 |
-
if file_path.name.lower().endswith('.pdf'):
|
117 |
-
with fitz.open(file_path.name) as doc:
|
118 |
-
text_content = "".join(page.get_text() for page in doc)
|
119 |
-
else:
|
120 |
-
with open(file_path.name, 'r', encoding='utf-8') as f:
|
121 |
-
text_content = f.read()
|
122 |
-
full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
|
123 |
-
yield from invoke_oracle(api_key, model, "You analyze documents. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
get_key(api_key)
|
128 |
-
base_video_path, _ = os.path.splitext(video_path.name)
|
129 |
-
progress(0.1, desc="🔮 Extracting Audio...")
|
130 |
-
audio_path = f"{base_video_path}.mp3"
|
131 |
-
transcript_text = "No audio found."
|
132 |
-
try:
|
133 |
-
with VideoFileClip(video_path.name) as clip:
|
134 |
-
clip.audio.write_audiofile(audio_path, bitrate="32k", logger=None)
|
135 |
-
progress(0.3, desc="🎤 Transcribing Audio...")
|
136 |
-
with open(audio_path, "rb") as audio_file:
|
137 |
-
transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
|
138 |
-
except Exception as e:
|
139 |
-
print(f"Audio failed: {e}")
|
140 |
-
progress(0.6, desc="🖼️ Sampling Frames...")
|
141 |
-
base64Frames = []
|
142 |
-
video = cv2.VideoCapture(video_path.name)
|
143 |
-
total_frames, fps = int(video.get(cv2.CAP_PROP_FRAME_COUNT)), video.get(cv2.CAP_PROP_FPS)
|
144 |
-
frames_to_skip = int(fps * 2)
|
145 |
-
for curr_frame in range(0, total_frames - 1, frames_to_skip):
|
146 |
-
if len(base64Frames) >= 10: break
|
147 |
-
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
|
148 |
-
success, frame = video.read()
|
149 |
-
if not success: break
|
150 |
-
_, buffer = cv2.imencode(".jpg", frame)
|
151 |
-
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
|
152 |
-
video.release()
|
153 |
-
progress(0.8, desc="🌀 Consulting Oracle...")
|
154 |
-
user_content = [{"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"}, *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)]
|
155 |
-
yield from invoke_oracle(api_key, model, "You are a video analyst. Respond in Markdown.", user_content, history)
|
156 |
|
157 |
-
|
158 |
-
|
159 |
get_key(api_key)
|
160 |
-
|
161 |
-
# Step 1: Translate the text if the language is not English
|
162 |
progress(0.2, desc=f"Translating to {language}...")
|
163 |
translated_text = text
|
164 |
if language != "English":
|
165 |
try:
|
166 |
-
response = o.chat.completions.create(
|
167 |
-
model="gpt-4o",
|
168 |
-
messages=[
|
169 |
-
{"role": "system", "content": f"You are a translator. Translate the following text to {language}. Output only the translated text."},
|
170 |
-
{"role": "user", "content": text}
|
171 |
-
],
|
172 |
-
temperature=0
|
173 |
-
)
|
174 |
translated_text = response.choices[0].message.content
|
175 |
except Exception as e:
|
176 |
raise gr.Error(f"Translation failed: {e}")
|
177 |
-
|
178 |
-
# Step 2: Generate speech from the (possibly translated) text
|
179 |
progress(0.6, desc="Summoning voice...")
|
180 |
speech_file_path = Path(__file__).parent / f"speech.{format}"
|
181 |
try:
|
182 |
-
response = o.audio.speech.create(
|
183 |
-
model=tts_model,
|
184 |
-
voice=voice,
|
185 |
-
input=translated_text,
|
186 |
-
response_format=format
|
187 |
-
)
|
188 |
response.stream_to_file(speech_file_path)
|
189 |
except Exception as e:
|
190 |
raise gr.Error(f"Speech generation failed: {e}")
|
191 |
-
|
192 |
progress(1.0, desc="Voice summoned!")
|
193 |
return str(speech_file_path), translated_text
|
194 |
|
195 |
# 🔮 UI
|
196 |
-
with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
|
197 |
initial_state = load_state()
|
198 |
app_state = gr.State(initial_state)
|
199 |
gr.Markdown(H1.format(UI_TITLE))
|
@@ -206,40 +169,40 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
|
|
206 |
model_selector = gr.Dropdown(choices=list(MODELS.keys()), label="🔮 Oracle", value=initial_state.get('model', "GPT-4o ✨"))
|
207 |
save_btn.click(save_key, inputs=api_key_box, outputs=status_txt)
|
208 |
|
209 |
-
chatbot = gr.Chatbot(height=
|
210 |
|
211 |
with gr.Tabs():
|
212 |
with gr.TabItem("💬 Chat"):
|
213 |
text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
|
214 |
-
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
with gr.Row():
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
-
with gr.TabItem("🎤 Audio"):
|
226 |
-
audio_input = gr.File(label="Upload Audio", type="file")
|
227 |
-
audio_prompt = gr.Textbox(label="Audio Prompt:", value=initial_state.get('audio_prompt', "Summarize this audio."))
|
228 |
-
audio_btn = gr.Button("🗣️ Summon Echo")
|
229 |
-
audio_event = audio_btn.click(summon_echo_from_audio, [api_key_box, model_selector, audio_prompt, audio_input, chatbot], chatbot)
|
230 |
-
|
231 |
-
with gr.TabItem("🎥 Video"):
|
232 |
-
video_input = gr.File(label="Upload Video", type="file")
|
233 |
-
video_prompt = gr.Textbox(label="Video Prompt:", value=initial_state.get('video_prompt', "Summarize this video."))
|
234 |
-
video_btn = gr.Button("🎬 Summon Chronicle")
|
235 |
-
video_event = video_btn.click(summon_chronicle_from_video, [api_key_box, model_selector, video_prompt, video_input, chatbot], chatbot)
|
236 |
-
|
237 |
-
with gr.TabItem("📄 Document"):
|
238 |
-
doc_input = gr.File(label="Upload PDF or TXT", type="file")
|
239 |
-
doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
|
240 |
-
doc_btn = gr.Button("📖 Summon Wisdom")
|
241 |
-
doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)
|
242 |
-
|
243 |
with gr.TabItem("🔊 Speech Synthesis"):
|
244 |
gr.Markdown(H2.format("Give Voice to Words"))
|
245 |
tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
|
@@ -256,15 +219,15 @@ with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary
|
|
256 |
# --- Autosave Event Listeners ---
|
257 |
components_to_save = {
|
258 |
'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
|
259 |
-
'
|
260 |
-
'doc_prompt': doc_prompt, 'tts_language': tts_language, 'tts_voice': tts_voice,
|
261 |
'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
|
262 |
}
|
263 |
for key, component in components_to_save.items():
|
264 |
component.change(update_and_save, [gr.State(key), component, app_state], app_state)
|
265 |
-
|
266 |
-
for event in [text_event, image_event, audio_event, video_event, doc_event]:
|
267 |
-
event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
|
268 |
|
269 |
if __name__ == "__main__":
|
270 |
-
|
|
|
|
|
|
|
|
11 |
from io import BytesIO
|
12 |
from PIL import Image
|
13 |
from pathlib import Path
|
14 |
+
import numpy as np
|
15 |
+
from gradio_webrtc import WebRTC
|
16 |
|
17 |
# 📜 CONFIG
|
18 |
UI_TITLE = "✨🧙♂️🔮 GPT-4o Omni-Oracle"
|
|
|
20 |
STATE_FILE = "app_state.json"
|
21 |
MODELS = {
|
22 |
"GPT-4o ✨": "gpt-4o",
|
23 |
+
"o3 (Advanced Reasoning) 🧠": "gpt-4-turbo", # Placeholder
|
24 |
"o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
|
25 |
"o4-mini-high (Vision) 👁️🗨️": "gpt-4o", # Placeholder
|
26 |
"GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
|
|
|
37 |
"🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
|
38 |
"🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
|
39 |
}
|
40 |
+
# For WebRTC - Replace with your own if deploying
|
41 |
+
RTC_CONFIGURATION = {
|
42 |
+
"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
|
43 |
+
}
|
44 |
|
45 |
# 🎨 STYLE
|
46 |
H1 = "# <font size='7'>{0}</font>"
|
47 |
H2 = "## <font size='6'>{0}</font>"
|
48 |
+
CSS = """
|
49 |
+
.my-group {max-width: 500px !important; max-height: 500px !important;}
|
50 |
+
.my-column {display: flex !important; justify-content: center !important; align-items: center !important;}
|
51 |
+
"""
|
52 |
|
53 |
# 🪄 HELPERS, LORE & AUTOSAVE RITUALS
|
54 |
def save_state(data: dict):
|
|
|
85 |
o.api_key = k
|
86 |
return k
|
87 |
|
88 |
+
# --- Image & Audio Streaming Functions ---
|
89 |
+
|
90 |
+
def transform_cv2(frame: np.ndarray, transform: str):
|
91 |
+
"""Applies a magical filter to a single frame from a webcam stream."""
|
92 |
+
if transform == "cartoon":
|
93 |
+
img_color = cv2.pyrDown(cv2.pyrDown(frame))
|
94 |
+
for _ in range(6):
|
95 |
+
img_color = cv2.bilateralFilter(img_color, 9, 9, 7)
|
96 |
+
img_color = cv2.pyrUp(cv2.pyrUp(img_color))
|
97 |
+
img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
|
98 |
+
img_edges = cv2.adaptiveThreshold(
|
99 |
+
cv2.medianBlur(img_edges, 7), 255, cv2.ADAPTIVE_THRESH_MEAN_C,
|
100 |
+
cv2.THRESH_BINARY, 9, 2)
|
101 |
+
img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB)
|
102 |
+
return cv2.bitwise_and(img_color, img_edges)
|
103 |
+
elif transform == "edges":
|
104 |
+
return cv2.cvtColor(cv2.Canny(frame, 100, 200), cv2.COLOR_GRAY2BGR)
|
105 |
+
elif transform == "flip":
|
106 |
+
return np.flipud(frame)
|
107 |
+
return frame
|
108 |
+
|
109 |
+
def transcribe_streaming(audio_chunk, history_state):
|
110 |
+
"""Transcribes a chunk of audio, keeping context from previous chunks."""
|
111 |
+
if audio_chunk is None:
|
112 |
+
return history_state, ""
|
113 |
+
|
114 |
+
# In a real scenario, you would use a streaming-capable ASR model.
|
115 |
+
# Here, we simulate it by transcribing each chunk individually.
|
116 |
+
# This is a placeholder for a more complex implementation.
|
117 |
+
get_key(os.getenv("OPENAI_KEY", "")) # Ensure API key is set
|
118 |
+
|
119 |
+
# Save chunk to a temporary file to use with OpenAI API
|
120 |
+
temp_wav_path = "temp_chunk.wav"
|
121 |
+
sample_rate, data = audio_chunk
|
122 |
+
import soundfile as sf
|
123 |
+
sf.write(temp_wav_path, data, sample_rate)
|
124 |
|
|
|
|
|
|
|
125 |
try:
|
126 |
+
with open(temp_wav_path, "rb") as audio_file:
|
127 |
+
transcript = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
128 |
+
new_text = transcript.text
|
|
|
|
|
|
|
|
|
129 |
except Exception as e:
|
130 |
+
print(f"Transcription error: {e}")
|
131 |
+
new_text = "(...)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
+
history_state += new_text + " "
|
134 |
+
return history_state, history_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
+
# --- Other Functions (TTS, etc.) ---
|
137 |
+
def generate_speech(api_key, tts_model, voice, text, language_key, format, progress=gr.Progress()):
|
138 |
get_key(api_key)
|
139 |
+
language = LANGUAGES.get(language_key, "English")
|
|
|
140 |
progress(0.2, desc=f"Translating to {language}...")
|
141 |
translated_text = text
|
142 |
if language != "English":
|
143 |
try:
|
144 |
+
response = o.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": f"Translate to {language}. Output only the translation."}, {"role": "user", "content": text}], temperature=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
translated_text = response.choices[0].message.content
|
146 |
except Exception as e:
|
147 |
raise gr.Error(f"Translation failed: {e}")
|
|
|
|
|
148 |
progress(0.6, desc="Summoning voice...")
|
149 |
speech_file_path = Path(__file__).parent / f"speech.{format}"
|
150 |
try:
|
151 |
+
response = o.audio.speech.create(model=tts_model, voice=voice, input=translated_text, response_format=format)
|
|
|
|
|
|
|
|
|
|
|
152 |
response.stream_to_file(speech_file_path)
|
153 |
except Exception as e:
|
154 |
raise gr.Error(f"Speech generation failed: {e}")
|
|
|
155 |
progress(1.0, desc="Voice summoned!")
|
156 |
return str(speech_file_path), translated_text
|
157 |
|
158 |
# 🔮 UI
|
159 |
+
with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange"), css=CSS) as demo:
|
160 |
initial_state = load_state()
|
161 |
app_state = gr.State(initial_state)
|
162 |
gr.Markdown(H1.format(UI_TITLE))
|
|
|
169 |
model_selector = gr.Dropdown(choices=list(MODELS.keys()), label="🔮 Oracle", value=initial_state.get('model', "GPT-4o ✨"))
|
170 |
save_btn.click(save_key, inputs=api_key_box, outputs=status_txt)
|
171 |
|
172 |
+
chatbot = gr.Chatbot(height=400, label="📜 Scroll of Conversation", type='messages', value=initial_state.get('chatbot', []))
|
173 |
|
174 |
with gr.Tabs():
|
175 |
with gr.TabItem("💬 Chat"):
|
176 |
text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
|
177 |
+
# This is a simplified invoke_oracle for text-only chat
|
178 |
+
text_event = text_prompt.submit(fn=lambda k, m, p, h: invoke_oracle(k, m, "You are a helpful AI.", [{"type": "text", "text": p}], h), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)
|
179 |
+
|
180 |
+
with gr.TabItem("🖼️ Streaming Image"):
|
181 |
+
gr.Markdown(H2.format("Live Image Enchantments"))
|
182 |
+
with gr.Column(elem_classes=["my-column"]):
|
183 |
+
with gr.Group(elem_classes=["my-group"]):
|
184 |
+
transform_filter = gr.Dropdown(choices=["cartoon", "edges", "flip"], value="flip", label="Transformation")
|
185 |
+
streaming_image = gr.Image(sources=["webcam"], type="numpy", streaming=True)
|
186 |
+
streaming_image.stream(transform_cv2, [streaming_image, transform_filter], streaming_image, time_limit=30, stream_every=0.1)
|
187 |
+
|
188 |
+
with gr.TabItem("🎤 Streaming Audio"):
|
189 |
+
gr.Markdown(H2.format("Real-time Transcription Rite"))
|
190 |
with gr.Row():
|
191 |
+
mic_input = gr.Audio(sources="microphone", streaming=True)
|
192 |
+
transcript_output = gr.Textbox(label="Transcript", interactive=False)
|
193 |
+
transcript_state = gr.State(value="")
|
194 |
+
mic_input.stream(transcribe_streaming, [mic_input, transcript_state], [transcript_state, transcript_output], time_limit=20, stream_every=1)
|
195 |
+
|
196 |
+
with gr.TabItem("👁️ Object Detection"):
|
197 |
+
gr.Markdown(H2.format("Live Scrying with YOLOv10"))
|
198 |
+
gr.HTML("<h3 style='text-align: center'>Requires a separate inference server for YOLOv10. This is a UI placeholder.</h3>")
|
199 |
+
with gr.Column(elem_classes=["my-column"]):
|
200 |
+
with gr.Group(elem_classes=["my-group"]):
|
201 |
+
webrtc_stream = WebRTC(label="Stream", rtc_configuration=RTC_CONFIGURATION)
|
202 |
+
conf_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
|
203 |
+
# Placeholder for the actual stream event handler which would call the YOLOv10 model
|
204 |
+
# webrtc_stream.stream(fn=detection_placeholder, inputs=[webrtc_stream, conf_threshold], outputs=[webrtc_stream], time_limit=10)
|
205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
with gr.TabItem("🔊 Speech Synthesis"):
|
207 |
gr.Markdown(H2.format("Give Voice to Words"))
|
208 |
tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
|
|
|
219 |
# --- Autosave Event Listeners ---
|
220 |
components_to_save = {
|
221 |
'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
|
222 |
+
'tts_language': tts_language, 'tts_voice': tts_voice,
|
|
|
223 |
'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
|
224 |
}
|
225 |
for key, component in components_to_save.items():
|
226 |
component.change(update_and_save, [gr.State(key), component, app_state], app_state)
|
227 |
+
text_event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)
|
|
|
|
|
228 |
|
229 |
if __name__ == "__main__":
|
230 |
+
# A placeholder function for the YOLOv10 detection since we don't have the model loaded here.
|
231 |
+
def detection_placeholder(image, conf):
|
232 |
+
return image # Just return the image as is.
|
233 |
+
demo.launch(share=True, debug=True)
|