|
import streamlit as st |
|
import torch |
|
from PIL import Image |
|
from gtts import gTTS |
|
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
st.set_page_config(page_title="Magic Story Generator", layout="centered", page_icon="📖") |
|
|
|
|
|
@st.cache_resource |
|
def load_models(): |
|
caption_model = pipeline("image-to-text", "Salesforce/blip-image-captioning-base") |
|
story_model = AutoModelForCausalLM.from_pretrained( |
|
"Qwen/Qwen3-1.7B", |
|
device_map="auto", |
|
torch_dtype=torch.float16, |
|
trust_remote_code=True |
|
) |
|
story_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-1.7B", trust_remote_code=True) |
|
return caption_model, story_model, story_tokenizer |
|
|
|
|
|
caption_pipe, story_model, story_tokenizer = load_models() |
|
|
|
|
|
st.title("📖 Instant Story Generator") |
|
uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"]) |
|
|
|
if uploaded_image: |
|
img = Image.open(uploaded_image).convert("RGB") |
|
st.image(img, caption="Your Image", use_column_width=True) |
|
|
|
|
|
caption = caption_pipe(img)[0]['generated_text'] |
|
|
|
|
|
messages = [{ |
|
"role": "system", |
|
"content": f"Create a 50 to 100 words children's story based on: {caption}." |
|
}] |
|
|
|
inputs = story_tokenizer.apply_chat_template( |
|
messages, |
|
return_tensors="pt" |
|
).to(story_model.device) |
|
|
|
outputs = story_model.generate( |
|
inputs, |
|
max_new_tokens=300, |
|
temperature=0.7, |
|
top_p=0.9 |
|
) |
|
|
|
|
|
story = story_tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True) |
|
st.subheader("Generated Story") |
|
st.write(story) |
|
|
|
|
|
audio = gTTS(text=story, lang='en') |
|
with tempfile.NamedTemporaryFile(delete=False) as fp: |
|
audio.save(fp.name) |
|
st.audio(fp.name, format='audio/mp3') |
|
|