Spaces:

Eason918
/

Storytelling_Application

Sleeping

File size: 2,419 Bytes

cfb477a
 
 
 
 
0559d83
cfb477a
ee966b7
cfb477a
 
 
239d642
cfb477a
 
 
 
 
 
 
f6a48f5
cfb477a
 
239d642
cfb477a
239d642
cfb477a
 
 
239d642
 
 
 
 
cfb477a
 
239d642
cfb477a
 
 
 
 
 
 
 
 
239d642
cfb477a
 
 
 
239d642
cfb477a
f6a48f5
cfb477a
239d642
cfb477a
239d642
cfb477a
 
239d642
cfb477a
 
 
 
 
239d642
cfb477a
 
 
239d642
cfb477a
 
239d642
cfb477a

import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import os
os.system("pip install transformers==4.36.2")

# 加載 Hugging Face 的模型
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
story_generator = pipeline("text-generation", model="facebook/opt-1.3b")

# 圖片 → 文字（生成描述）
def img2text(image_path):
    text = image_to_text_model(image_path)[0]["generated_text"]
    return text

# 文字 → 故事（生成完整故事）
def text2story(text):
    prompt = f"Write a fun and magical children's story based on this idea: {text}.\n\nOnce upon a time..."
    story = story_generator(prompt, max_length=250, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2)[0]['generated_text']
    return story

# 故事 → 語音（TTS）
def text2audio_gtts(story_text, filename="story.mp3"):
    # 避免文件冲突
    if os.path.exists(filename):
        os.remove(filename)

    # 限制 TTS 文本長度
    max_chars = 500  # gTTS 可能不支持過長文本
    story_text = story_text[:max_chars]

    # 生成语音
    tts = gTTS(text=story_text, lang="en")
    tts.save(filename)

    return filename

# Streamlit Web UI
st.set_page_config(page_title="AI Storyteller", page_icon="📖")
st.header("📖 AI Storyteller: Turn Your Image into a Story with Audio")

uploaded_file = st.file_uploader("Upload an Image...", type=["jpg", "png"])

if uploaded_file:
    # 保存圖片到本地
    image_path = "uploaded_image.jpg"
    with open(image_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # 讀取並顯示圖片
    image = Image.open(image_path)
    st.image(image, caption="Uploaded Image", use_container_width=True)

    # 生成圖片描述
    st.text("🔍 Generating image caption...")
    caption = img2text(image_path)  # 這裏改成文件路徑
    st.write("**Image Description:**", caption)

    # 生成故事
    st.text("📝 Generating story...")
    story = text2story(caption)
    st.write("**Generated Story:**")
    st.write(story)

    # 生成語音
    st.text("🔊 Generating audio...")
    audio_file = text2audio_gtts(story)

    # 播放音頻
    st.audio(audio_file, format="audio/mp3")

    # 下載按钮
    with open(audio_file, "rb") as file:
        st.download_button("📥 Download Audio", file, file_name="story.mp3")