Spaces:
Running
Running
import gradio as gr | |
import random | |
import time | |
from datetime import datetime | |
import tempfile | |
import os | |
from moviepy.editor import ImageClip, concatenate_videoclips | |
from gradio_client import Client | |
from PIL import Image | |
import edge_tts | |
import asyncio | |
import warnings | |
import numpy as np | |
warnings.filterwarnings('ignore') | |
# Initialize Gradio clients with public demo spaces | |
def initialize_clients(): | |
try: | |
# Use a public Stable Diffusion demo space instead of SDXL | |
image_client = Client("gradio/stable-diffusion-2") | |
arxiv_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
return image_client, arxiv_client | |
except Exception as e: | |
print(f"Error initializing clients: {str(e)}") | |
return None, None | |
if gr.NO_RELOAD: | |
# Initialize clients in NO_RELOAD block to prevent multiple initializations | |
IMAGE_CLIENT, ARXIV_CLIENT = initialize_clients() | |
STORY_GENRES = [ | |
"Science Fiction", | |
"Fantasy", | |
"Mystery", | |
"Romance", | |
"Horror", | |
"Adventure", | |
"Historical Fiction", | |
"Comedy" | |
] | |
STORY_STRUCTURES = { | |
"Three Act": "Setup (Introduction, Inciting Incident) -> Confrontation (Rising Action, Climax) -> Resolution (Falling Action, Conclusion)", | |
"Hero's Journey": "Ordinary World -> Call to Adventure -> Trials -> Transformation -> Return", | |
"Five Act": "Exposition -> Rising Action -> Climax -> Falling Action -> Resolution", | |
"Seven Point": "Hook -> Plot Turn 1 -> Pinch Point 1 -> Midpoint -> Pinch Point 2 -> Plot Turn 2 -> Resolution" | |
} | |
async def generate_speech(text, voice="en-US-AriaNeural"): | |
"""Generate speech from text using edge-tts""" | |
try: | |
communicate = edge_tts.Communicate(text, voice) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
tmp_path = tmp_file.name | |
await communicate.save(tmp_path) | |
return tmp_path | |
except Exception as e: | |
print(f"Error in text2speech: {str(e)}") | |
return None | |
def generate_story_prompt(base_prompt, genre, structure): | |
"""Generate an expanded story prompt based on genre and structure""" | |
prompt = f"""Create a {genre} story using this concept: '{base_prompt}' | |
Follow this structure: {STORY_STRUCTURES[structure]} | |
Include vivid descriptions and sensory details. | |
Make it engaging and suitable for visualization. | |
Keep each scene description clear and detailed enough for image generation. | |
Limit the story to 5-7 key scenes. | |
""" | |
return prompt | |
def generate_story(prompt, model_choice): | |
"""Generate story using specified model""" | |
try: | |
if ARXIV_CLIENT is None: | |
return "Error: Story generation service is not available." | |
result = ARXIV_CLIENT.predict( | |
prompt, | |
model_choice, | |
True, | |
api_name="/ask_llm" | |
) | |
return result | |
except Exception as e: | |
return f"Error generating story: {str(e)}" | |
def generate_image_from_text(text_prompt): | |
"""Generate an image from text description""" | |
try: | |
if IMAGE_CLIENT is None: | |
return None | |
result = IMAGE_CLIENT.predict( | |
text_prompt, | |
api_name="/predict" # Updated API endpoint for the public demo | |
) | |
return result | |
except Exception as e: | |
print(f"Error generating image: {str(e)}") | |
return None | |
def create_video_from_images(image_paths, durations): | |
"""Create video from a series of images""" | |
try: | |
if not image_paths: | |
return None | |
clips = [ImageClip(img_path).set_duration(dur) for img_path, dur in zip(image_paths, durations) if os.path.exists(img_path)] | |
if not clips: | |
return None | |
final_clip = concatenate_videoclips(clips, method="compose") | |
output_path = tempfile.mktemp(suffix=".mp4") | |
final_clip.write_videofile(output_path, fps=24) | |
return output_path | |
except Exception as e: | |
print(f"Error creating video: {str(e)}") | |
return None | |
def process_story(story_text, num_scenes=5): | |
"""Break story into scenes for visualization""" | |
if not story_text: | |
return [] | |
sentences = story_text.split('.') | |
scenes = [] | |
scene_length = max(1, len(sentences) // num_scenes) | |
for i in range(0, len(sentences), scene_length): | |
scene = '. '.join(sentences[i:i+scene_length]).strip() | |
if scene: | |
scenes.append(scene) | |
return scenes[:num_scenes] | |
def story_generator_interface(prompt, genre, structure, model_choice, num_scenes, words_per_scene): | |
"""Main story generation and multimedia creation function""" | |
try: | |
# Generate expanded prompt | |
story_prompt = generate_story_prompt(prompt, genre, structure) | |
# Generate story | |
story = generate_story(story_prompt, model_choice) | |
if story.startswith("Error"): | |
return story, None, None, None | |
# Process story into scenes | |
scenes = process_story(story, num_scenes) | |
# Generate images for each scene | |
image_paths = [] | |
for scene in scenes: | |
image = generate_image_from_text(scene) | |
if image is not None: | |
if isinstance(image, (str, bytes)): | |
image_paths.append(image) | |
else: | |
temp_path = tempfile.mktemp(suffix=".png") | |
Image.fromarray(image).save(temp_path) | |
image_paths.append(temp_path) | |
# Generate speech | |
audio_path = asyncio.run(generate_speech(story)) | |
# Create video if we have images | |
if image_paths: | |
scene_durations = [5.0] * len(image_paths) # 5 seconds per scene | |
video_path = create_video_from_images(image_paths, scene_durations) | |
else: | |
video_path = None | |
return story, image_paths, audio_path, video_path | |
except Exception as e: | |
error_msg = f"An error occurred: {str(e)}" | |
return error_msg, None, None, None | |
# Create Gradio interface | |
with gr.Blocks(title="AI Story Generator & Visualizer") as demo: | |
gr.Markdown("# ๐ญ AI Story Generator & Visualizer") | |
with gr.Row(): | |
with gr.Column(): | |
prompt_input = gr.Textbox( | |
label="Story Concept", | |
placeholder="Enter your story idea...", | |
lines=3 | |
) | |
genre_input = gr.Dropdown( | |
label="Genre", | |
choices=STORY_GENRES, | |
value="Fantasy" | |
) | |
structure_input = gr.Dropdown( | |
label="Story Structure", | |
choices=list(STORY_STRUCTURES.keys()), | |
value="Three Act" | |
) | |
model_choice = gr.Dropdown( | |
label="Model", | |
choices=["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2"], | |
value="mistralai/Mixtral-8x7B-Instruct-v0.1" | |
) | |
num_scenes = gr.Slider( | |
label="Number of Scenes", | |
minimum=3, | |
maximum=7, | |
value=5, | |
step=1 | |
) | |
words_per_scene = gr.Slider( | |
label="Words per Scene", | |
minimum=20, | |
maximum=100, | |
value=50, | |
step=10 | |
) | |
generate_btn = gr.Button("Generate Story & Media") | |
with gr.Row(): | |
with gr.Column(): | |
story_output = gr.Textbox( | |
label="Generated Story", | |
lines=10, | |
readonly=True | |
) | |
with gr.Column(): | |
gallery = gr.Gallery(label="Scene Visualizations") | |
with gr.Row(): | |
audio_output = gr.Audio(label="Story Narration") | |
video_output = gr.Video(label="Story Video") | |
generate_btn.click( | |
fn=story_generator_interface, | |
inputs=[prompt_input, genre_input, structure_input, model_choice, num_scenes, words_per_scene], | |
outputs=[story_output, gallery, audio_output, video_output] | |
) | |
if __name__ == "__main__": | |
demo.launch(reload=True) |