Spaces:

awacke1
/

GradioGPTGameGenerator

Runtime error

App Files Files Community

GradioGPTGameGenerator / app.py

awacke1

Update app.py

c381bf2 verified 5 days ago

raw

history blame

14.3 kB

	import os
	import gradio as gr
	import openai as o
	import base64
	import fitz # PyMuPDF
	import cv2
	from moviepy.video.io.VideoFileClip import VideoFileClip
	import json
	import requests
	import re
	from io import BytesIO
	from PIL import Image
	from pathlib import Path

	# 📜 CONFIG
	UI_TITLE = "✨🧙‍♂️🔮 GPT-4o Omni-Oracle"
	KEY_FILE = "key.txt"
	STATE_FILE = "app_state.json"
	MODELS = {
	"GPT-4o ✨": "gpt-4o",
	"o3 (Advanced Reasoning) �": "gpt-4-turbo", # Placeholder
	"o4-mini (Fastest) ⚡": "gpt-4-turbo", # Placeholder
	"o4-mini-high (Vision) 👁️‍🗨️": "gpt-4o", # Placeholder
	"GPT-4.5 (Research) 🔬": "gpt-4-turbo-preview", # Placeholder
	"GPT-4.1 (Analysis) 💻": "gpt-4-turbo", # Placeholder
	"GPT-4.1-mini (Everyday) ☕": "gpt-4-turbo", # Placeholder
	"GPT-4 Turbo 🚀": "gpt-4-turbo",
	"GPT-3.5 Turbo ⚡": "gpt-3.5-turbo",
	}
	VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer"]
	TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"]
	FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"]
	LANGUAGES = {
	"🇬🇧 English": "English", "🇨🇳 Chinese": "Chinese", "🇫🇷 French": "French", "🇩🇪 German": "German",
	"🇮🇱 Hebrew": "Hebrew", "🇮🇳 Hindi": "Hindi", "🇯🇵 Japanese": "Japanese", "🇳🇿 Maori": "Maori",
	"🇷🇺 Russian": "Russian", "🇪🇸 Spanish": "Spanish"
	}


	# 🎨 STYLE
	H1 = "# <font size='7'>{0}</font>"
	H2 = "## <font size='6'>{0}</font>"

	# 🪄 HELPERS, LORE & AUTOSAVE RITUALS
	def save_state(data: dict):
	"""A rune that inscribes the session's memory onto a JSON scroll."""
	with open(STATE_FILE, 'w') as f:
	json.dump(data, f, indent=4)

	def load_state() -> dict:
	"""A ritual to recall the session's memory from the JSON scroll."""
	if os.path.exists(STATE_FILE):
	with open(STATE_FILE, 'r') as f:
	try:
	return json.load(f)
	except json.JSONDecodeError:
	return {}
	return {}

	def update_and_save(key: str, value, state: dict):
	"""A binding spell that updates a memory and immediately inscribes it."""
	state[key] = value
	save_state(state)
	return state

	def save_key(k: str) -> str:
	"💾🔑 A rune to bind the Eldritch Key."
	if not k or not k.strip(): return "🚫 Empty Key"
	with open(KEY_FILE, "w") as f: f.write(k.strip())
	return "🔑✅ Key Saved!"

	def get_key(k: str) -> str:
	"📜🔑 A ritual to summon the Eldritch Key."
	k = k.strip() if k and k.strip() else (open(KEY_FILE).read().strip() if os.path.exists(KEY_FILE) else os.getenv("OPENAI_KEY", ""))
	if not k: raise gr.Error("❗🔑 An Eldritch Key (OpenAI API Key) is required.")
	o.api_key = k
	return k

	def file_to_base64(file_path):
	with open(file_path, "rb") as f:
	return base64.b64encode(f.read()).decode('utf-8')

	def invoke_oracle(scribe_key: str, model_name: str, system_prompt: str, user_content: list, history: list):
	get_key(scribe_key)
	messages = history + [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
	try:
	prophecy = o.chat.completions.create(model=model_name, messages=messages, stream=True)
	history.append({"role": "user", "content": "..."})
	history.append({"role": "assistant", "content": ""})
	for chunk in prophecy:
	if chunk.choices[0].delta.content:
	history[-1]['content'] += chunk.choices[0].delta.content
	yield history
	except Exception as e:
	yield history + [{"role": "assistant", "content": f"🧙‍♂️🔮 A magical disturbance occurred: {str(e)}"}]

	# --- Modality-Specific Summoning Rituals ---

	def summon_vision_from_image(api_key, model, prompt, image_path, history):
	if image_path is None: raise gr.Error("An image must be provided.")
	b64_image = file_to_base64(image_path.name)
	user_content = [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}]
	yield from invoke_oracle(api_key, model, "You are an assistant that analyzes images. Respond in Markdown.", user_content, history)

	def summon_echo_from_audio(api_key, model, prompt, audio_path, history):
	if audio_path is None: raise gr.Error("An audio file must be provided.")
	get_key(api_key)
	with open(audio_path.name, "rb") as audio_file:
	transcription = o.audio.transcriptions.create(model="whisper-1", file=audio_file)
	full_prompt = f"{prompt}\n\n--- Transcription ---\n{transcription.text}"
	yield from invoke_oracle(api_key, model, "You analyze audio transcripts. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)

	def summon_wisdom_from_text(api_key, model, prompt, file_path, history):
	if file_path is None: raise gr.Error("A file must be provided.")
	text_content = ""
	if file_path.name.lower().endswith('.pdf'):
	with fitz.open(file_path.name) as doc:
	text_content = "".join(page.get_text() for page in doc)
	else:
	with open(file_path.name, 'r', encoding='utf-8') as f:
	text_content = f.read()
	full_prompt = f"{prompt}\n\n--- Document Content ---\n{text_content[:10000]}..."
	yield from invoke_oracle(api_key, model, "You analyze documents. Respond in Markdown.", [{"type": "text", "text": full_prompt}], history)

	def summon_chronicle_from_video(api_key, model, prompt, video_path, history, progress=gr.Progress()):
	if video_path is None: raise gr.Error("A video must be provided.")
	get_key(api_key)
	base_video_path, _ = os.path.splitext(video_path.name)
	progress(0.1, desc="🔮 Extracting Audio...")
	audio_path = f"{base_video_path}.mp3"
	transcript_text = "No audio found."
	try:
	with VideoFileClip(video_path.name) as clip:
	clip.audio.write_audiofile(audio_path, bitrate="32k", logger=None)
	progress(0.3, desc="🎤 Transcribing Audio...")
	with open(audio_path, "rb") as audio_file:
	transcript_text = o.audio.transcriptions.create(model="whisper-1", file=audio_file).text
	except Exception as e:
	print(f"Audio failed: {e}")
	progress(0.6, desc="🖼️ Sampling Frames...")
	base64Frames = []
	video = cv2.VideoCapture(video_path.name)
	total_frames, fps = int(video.get(cv2.CAP_PROP_FRAME_COUNT)), video.get(cv2.CAP_PROP_FPS)
	frames_to_skip = int(fps * 2)
	for curr_frame in range(0, total_frames - 1, frames_to_skip):
	if len(base64Frames) >= 10: break
	video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
	success, frame = video.read()
	if not success: break
	_, buffer = cv2.imencode(".jpg", frame)
	base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
	video.release()
	progress(0.8, desc="🌀 Consulting Oracle...")
	user_content = [{"type": "text", "text": f"{prompt}\n\n--- Audio Transcript ---\n{transcript_text}"}, *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)]
	yield from invoke_oracle(api_key, model, "You are a video analyst. Respond in Markdown.", user_content, history)

	def generate_speech(api_key, tts_model, voice, text, language, format, progress=gr.Progress()):
	"""A ritual to give voice to the written word, in any tongue."""
	get_key(api_key)

	# Step 1: Translate the text if the language is not English
	progress(0.2, desc=f"Translating to {language}...")
	translated_text = text
	if language != "English":
	try:
	response = o.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": f"You are a translator. Translate the following text to {language}. Output only the translated text."},
	{"role": "user", "content": text}
	],
	temperature=0
	)
	translated_text = response.choices[0].message.content
	except Exception as e:
	raise gr.Error(f"Translation failed: {e}")

	# Step 2: Generate speech from the (possibly translated) text
	progress(0.6, desc="Summoning voice...")
	speech_file_path = Path(__file__).parent / f"speech.{format}"
	try:
	response = o.audio.speech.create(
	model=tts_model,
	voice=voice,
	input=translated_text,
	response_format=format
	)
	response.stream_to_file(speech_file_path)
	except Exception as e:
	raise gr.Error(f"Speech generation failed: {e}")

	progress(1.0, desc="Voice summoned!")
	return str(speech_file_path), translated_text

	# 🔮 UI
	with gr.Blocks(title=UI_TITLE, theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange")) as demo:
	initial_state = load_state()
	app_state = gr.State(initial_state)
	gr.Markdown(H1.format(UI_TITLE))

	with gr.Accordion("🔑 Eldritch Key & Oracle Selection", open=True):
	with gr.Row():
	api_key_box = gr.Textbox(label="🔑 Key", type="password", placeholder="sk-...", scale=3, value=initial_state.get('api_key', ''))
	save_btn = gr.Button("💾", scale=1)
	status_txt = gr.Textbox(interactive=False, scale=1, label="Status")
	model_selector = gr.Dropdown(choices=list(MODELS.keys()), label="🔮 Oracle", value=initial_state.get('model', "GPT-4o ✨"))
	save_btn.click(save_key, inputs=api_key_box, outputs=status_txt)

	chatbot = gr.Chatbot(height=500, label="📜 Scroll of Conversation", type='messages', value=initial_state.get('chatbot', []))

	with gr.Tabs():
	with gr.TabItem("💬 Chat"):
	text_prompt = gr.Textbox(label="Your Quest:", placeholder="Type your message...", value=initial_state.get('text_prompt', ''))
	text_event = text_prompt.submit(fn=lambda api_key, model, prompt, hist: invoke_oracle(api_key, model, "You are a helpful AI assistant.", [{"type": "text", "text": prompt}], hist), inputs=[api_key_box, model_selector, text_prompt, chatbot], outputs=chatbot)

	with gr.TabItem("🖼️ Image"):
	with gr.Row():
	image_input = gr.File(label="Upload Image", type="file")
	image_output = gr.Image(label="Your Image", type="filepath", interactive=False)
	image_prompt = gr.Textbox(label="Image Prompt:", value=initial_state.get('image_prompt', "What is in this image?"))
	image_btn = gr.Button("👁️ Summon Vision")
	image_input.change(lambda x: x, inputs=image_input, outputs=image_output)
	image_event = image_btn.click(summon_vision_from_image, [api_key_box, model_selector, image_prompt, image_input, chatbot], chatbot)

	with gr.TabItem("🎤 Audio"):
	audio_input = gr.File(label="Upload Audio", type="file")
	audio_prompt = gr.Textbox(label="Audio Prompt:", value=initial_state.get('audio_prompt', "Summarize this audio."))
	audio_btn = gr.Button("🗣️ Summon Echo")
	audio_event = audio_btn.click(summon_echo_from_audio, [api_key_box, model_selector, audio_prompt, audio_input, chatbot], chatbot)

	with gr.TabItem("🎥 Video"):
	video_input = gr.File(label="Upload Video", type="file")
	video_prompt = gr.Textbox(label="Video Prompt:", value=initial_state.get('video_prompt', "Summarize this video."))
	video_btn = gr.Button("🎬 Summon Chronicle")
	video_event = video_btn.click(summon_chronicle_from_video, [api_key_box, model_selector, video_prompt, video_input, chatbot], chatbot)

	with gr.TabItem("📄 Document"):
	doc_input = gr.File(label="Upload PDF or TXT", type="file")
	doc_prompt = gr.Textbox(label="Document Prompt:", value=initial_state.get('doc_prompt', "Summarize this document."))
	doc_btn = gr.Button("📖 Summon Wisdom")
	doc_event = doc_btn.click(summon_wisdom_from_text, [api_key_box, model_selector, doc_prompt, doc_input, chatbot], chatbot)

	with gr.TabItem("🔊 Speech Synthesis"):
	gr.Markdown(H2.format("Give Voice to Words"))
	tts_language = gr.Radio(choices=list(LANGUAGES.keys()), label="🈯 Language", value=initial_state.get('tts_language', "🇬🇧 English"))
	with gr.Row():
	tts_voice = gr.Dropdown(choices=VOICES, label="🗣️ Voice", value=initial_state.get('tts_voice', "alloy"))
	tts_model_select = gr.Dropdown(choices=TTS_MODELS, label="🧠 TTS Model", value=initial_state.get('tts_model', "gpt-4o-mini-tts"))
	tts_format = gr.Dropdown(choices=FORMATS, label="📦 Format", value=initial_state.get('tts_format', "mp3"))
	tts_text_input = gr.Textbox(label="📜 Text to Speak", lines=4, placeholder="Enter text here...", value=initial_state.get('tts_text', ''))
	tts_btn = gr.Button("🔊 Generate Speech")
	tts_translated_text = gr.Textbox(label="Translated Text (Output)", interactive=False)
	tts_audio_output = gr.Audio(label="🎧 Spoken Word", type="filepath")
	tts_event = tts_btn.click(generate_speech, [api_key_box, tts_model_select, tts_voice, tts_text_input, tts_language, tts_format], [tts_audio_output, tts_translated_text])

	# --- Autosave Event Listeners ---
	components_to_save = {
	'api_key': api_key_box, 'model': model_selector, 'text_prompt': text_prompt,
	'image_prompt': image_prompt, 'audio_prompt': audio_prompt, 'video_prompt': video_prompt,
	'doc_prompt': doc_prompt, 'tts_language': tts_language, 'tts_voice': tts_voice,
	'tts_model': tts_model_select, 'tts_format': tts_format, 'tts_text': tts_text_input
	}
	for key, component in components_to_save.items():
	component.change(update_and_save, [gr.State(key), component, app_state], app_state)

	for event in [text_event, image_event, audio_event, video_event, doc_event]:
	event.then(lambda history, state: update_and_save('chatbot', history, state), [chatbot, app_state], app_state)

	if __name__ == "__main__":
	demo.launch(share=True, debug=True)