Spaces:

mayf
/

1

Sleeping

App Files Files Community

1 / app.py

mayf

Update app.py

fd1d947 verified 17 days ago

raw

history blame

3.77 kB

	import os
	import time
	import streamlit as st
	from PIL import Image
	from io import BytesIO
	from transformers import pipeline
	from huggingface_hub import login
	from gtts import gTTS
	import tempfile

	# —––––––– Requirements —–––––––
	# pip install streamlit pillow gTTS transformers huggingface_hub

	# —––––––– Page Config —–––––––
	st.set_page_config(page_title="Magic Story Generator (Local Pipeline)", layout="centered")
	st.title("📖✨ Turn Images into Children's Stories")

	# —––––––– Load Clients & Pipelines (cached) —–––––––
	@st.cache_resource(show_spinner=False)
	def load_clients():
	# Authenticate to pull private or remote-code models if needed
	hf_token = st.secrets.get("HF_TOKEN")
	if hf_token:
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
	login(hf_token)

	# 1) Image-captioning pipeline (BLIP)
	captioner = pipeline(
	task="image-to-text",
	model="Salesforce/blip-image-captioning-base",
	device=-1 # CPU; change to 0 for GPU
	)

	# 2) Story-generation pipeline (DeepSeek-R1-Distill-Qwen)
	storyteller = pipeline(
	task="text-generation",
	model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
	tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
	trust_remote_code=True,
	device=-1, # CPU; set 0+ for GPU
	temperature=0.6,
	top_p=0.9,
	repetition_penalty=1.1,
	no_repeat_ngram_size=2,
	max_new_tokens=120,
	return_full_text=False
	)

	return captioner, storyteller

	captioner, storyteller = load_clients()

	# —––––––– Helpers —–––––––
	def generate_caption(img: Image.Image) -> str:
	# Use the BLIP pipeline to generate a caption
	result = captioner(img)
	if isinstance(result, list) and result:
	return result[0].get("generated_text", "").strip()
	return ""


	def generate_story(caption: str) -> str:
	# Build a simple prompt incorporating the caption
	prompt = (
	f"Image description: {caption}\n"
	"Write a coherent 50-100 word children's story that flows naturally."
	)

	t0 = time.time()
	outputs = storyteller(
	prompt
	)
	gen_time = time.time() - t0
	st.text(f"⏱ Generated in {gen_time:.1f}s")

	story = outputs[0].get("generated_text", "").strip()
	# Truncate to 100 words
	words = story.split()
	if len(words) > 100:
	story = " ".join(words[:100]) + ('.' if not story.endswith('.') else '')
	return story

	# —––––––– Main App —–––––––
	uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
	if uploaded:
	img = Image.open(uploaded).convert("RGB")
	if max(img.size) > 2048:
	img.thumbnail((2048, 2048))
	st.image(img, use_container_width=True)

	with st.spinner("🔍 Generating caption..."):
	caption = generate_caption(img)
	if not caption:
	st.error("😢 Couldn't understand this image. Try another one!")
	st.stop()
	st.success(f"Caption: {caption}")

	with st.spinner("📝 Writing story..."):
	story = generate_story(caption)

	st.subheader("📚 Your Magical Story")
	st.write(story)

	with st.spinner("🔊 Converting to audio..."):
	try:
	tts = gTTS(text=story, lang="en", slow=False)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
	tts.save(fp.name)
	st.audio(fp.name, format="audio/mp3")
	except Exception as e:
	st.warning(f"⚠️ TTS failed: {e}")

	# Footer
	st.markdown("---\nMade with ❤️ by your friendly story wizard")