Spaces:

Eason918
/

Storytelling_Application

Sleeping

File size: 3,092 Bytes

cfb477a
 
 
 
 
0559d83
cfb477a
23967c8
 
bd1669a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
006c053
cfb477a
a8b0d91
cfb477a
006c053
cfb477a
 
 
 
006c053
cfb477a
 
bfa0b31
cfb477a
 
006c053
cfb477a
006c053
 
cfb477a
 
 
006c053
 
239d642
 
 
006c053
cfb477a
 
239d642
cfb477a
 
 
 
 
 
 
 
006c053
 
cfb477a
 
 
 
006c053
cfb477a
f6a48f5
cfb477a
006c053
cfb477a
006c053
cfb477a
 
006c053
cfb477a
 
 
 
 
006c053
cfb477a
 
 
006c053
cfb477a
 
006c053
cfb477a

import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import os
os.system("pip install transformers==4.36.2")

st.set_page_config(page_title="AI Storyteller", page_icon="📖")

# Beautify | UI 美化
page_bg_img = """
<style>
[data-testid="stAppViewContainer"] {
    background-image: url("https://your-background-image-url.com");
    background-size: cover;
}
h1, h3 {
    text-align: center;
    color: white;
}
</style>
"""
st.markdown(page_bg_img, unsafe_allow_html=True)

# 🎯 Load Hugging Face Models | 加載 Hugging Face 模型
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
story_generator = pipeline("text-generation", model="facebook/opt-350m", device=-1)

# Image to Text (Generate Caption) | 圖片 → 文字（生成描述）
def img2text(image_path):
    text = image_to_text_model(image_path)[0]["generated_text"]
    return text

# Text to Story (Generate a Complete Story) | 文字 → 故事（生成完整故事）
def text2story(text):
    prompt = f"Write a fun and magical children's story based on this idea: {text}.\n\nOnce upon a time..."
    story = story_generator(prompt, max_length=300, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2)[0]['generated_text']
    return story

#  Story to Speech (TTS) | 故事 → 語音（TTS）
def text2audio_gtts(story_text, filename="story.mp3"):

    # Avoid filename conflicts | 避免文件冲突
    if os.path.exists(filename):
        os.remove(filename)


    # Limit text length (to prevent gTTS crashes)| 限制 TTS 文本長度（避免 gTTS 崩潰）
    max_chars = 500  # gTTS 可能不支持過長文本
    story_text = story_text[:max_chars]

    # Generate Speech | 生成语音
    tts = gTTS(text=story_text, lang="en")
    tts.save(filename)

    return filename

# Streamlit Web UI
st.header("📖 AI Storyteller: Turn Your Image into a Story with Audio")

uploaded_file = st.file_uploader("Upload an Image...", type=["jpg", "png"])

if uploaded_file:

    # Save image locally | 保存圖片到本地
    image_path = "uploaded_image.jpg"
    with open(image_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    #  Load and Display Image | 讀取並顯示圖片
    image = Image.open(image_path)
    st.image(image, caption="Uploaded Image", use_container_width=True)

    # Generate Image Caption | 生成圖片描述
    st.text("🔍 Generating image caption...")
    caption = img2text(image_path)  
    st.write("**Image Description:**", caption)

    #  Generate Story | 生成故事
    st.text("📝 Generating story...")
    story = text2story(caption)
    st.write("**Generated Story:**")
    st.write(story)

    # Generate Audio | 生成語音
    st.text("🔊 Generating audio...")
    audio_file = text2audio_gtts(story)

    # Play Audio | 播放音頻
    st.audio(audio_file, format="audio/mp3")

    # Download Audio | 下載按钮
    with open(audio_file, "rb") as file:
        st.download_button("📥 Download Audio", file, file_name="story.mp3")