File size: 4,801 Bytes
b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f f30f4dc b22dd2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
from openai import OpenAI
from PIL import Image
import base64
import io
from gtts import gTTS
def pil_to_base64(image, max_size=1024):
"""
Convert a PIL image to a base64 string, resizing if necessary.
"""
if max(image.size) > max_size:
image.thumbnail((max_size, max_size))
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode()
def generate_initial_story(image, genre, api_key):
"""
Generate the initial travel story based on the image and genre.
"""
if not image or not genre or not api_key:
return "Please provide all inputs.", []
image_base64 = pil_to_base64(image)
client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
prompt = f"Generate a {genre} story based on this travel photo."
messages = [
{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}]}
]
try:
completion = client.chat.completions.create(model="meta-llama/llama-3.2-11b-vision-instruct:free", messages=messages)
story = completion.choices[0].message.content
messages.append({"role": "assistant", "content": story})
return story, messages
except Exception as e:
return f"Error: {str(e)}", []
def generate_continuation(continuation_prompt, messages, api_key):
"""
Generate a continuation of the story based on the provided prompt.
"""
if not continuation_prompt or not messages or not api_key:
return "Please provide a continuation prompt.", messages
client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
new_user_message = {"role": "user", "content": continuation_prompt}
updated_messages = messages + [new_user_message]
try:
completion = client.chat.completions.create(model="meta-llama/llama-3.2-11b-vision-instruct:free", messages=updated_messages)
continuation = completion.choices[0].message.content
updated_messages.append({"role": "assistant", "content": continuation})
full_story = "\n\n".join([msg["content"] for msg in updated_messages if msg["role"] == "assistant"])
return full_story, updated_messages
except Exception as e:
return f"Error: {str(e)}", messages
def generate_audio(story):
"""
Generate an audio file from the story text using gTTS.
"""
if not story:
return None
tts = gTTS(text=story, lang='en')
audio_file = "story.mp3"
tts.save(audio_file)
return audio_file
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Interactive Travel Story Generator")
gr.Markdown("Upload a travel photo, select a genre, and provide your OpenRouter API key to generate a personalized travel story.")
gr.Markdown("After generating a story, enter a prompt below to continue it, or click 'Generate Audio' to hear it!")
gr.Markdown("Note: You need an OpenRouter API key from [OpenRouter](https://openrouter.ai/).")
with gr.Row():
image_input = gr.Image(type="pil", label="Upload Travel Photo")
genre_input = gr.Textbox(label="Story Genre (e.g., adventure, romance, mystery)")
api_key_input = gr.Textbox(label="OpenRouter API Key", type="password")
generate_button = gr.Button("Generate Story")
story_output = gr.Textbox(label="Generated Story", lines=10)
with gr.Row():
tts_button = gr.Button("Generate Audio")
audio_output = gr.Audio(label="Story Audio")
continuation_prompt = gr.Textbox(label="Continuation Prompt (e.g., 'Now, the hero finds a mysterious map.')")
continue_button = gr.Button("Continue Story")
message_state = gr.State([])
def on_generate_story(image, genre, api_key):
story, messages = generate_initial_story(image, genre, api_key)
return story, messages
generate_button.click(
fn=on_generate_story,
inputs=[image_input, genre_input, api_key_input],
outputs=[story_output, message_state]
)
def on_generate_continuation(continuation_prompt, message_state, api_key):
full_story, updated_messages = generate_continuation(continuation_prompt, message_state, api_key)
return full_story, updated_messages
continue_button.click(
fn=on_generate_continuation,
inputs=[continuation_prompt, message_state, api_key_input],
outputs=[story_output, message_state]
)
def on_generate_audio(story):
audio_file = generate_audio(story)
return audio_file
tts_button.click(
fn=on_generate_audio,
inputs=story_output,
outputs=audio_output
)
demo.launch() |