File size: 3,551 Bytes
cfb477a
 
 
 
 
0559d83
cfb477a
27fec73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0e9bb3
27fec73
 
 
 
 
 
cfb477a
fab3813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27fec73
083ac07
27fec73
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import os
os.system("pip install transformers==4.36.2")

def main():
    st.set_page_config(page_title="AI Storyteller", page_icon="📖")

    # Beautify | UI 美化
    page_bg_img = """
    <style>
    [data-testid="stAppViewContainer"] {
        background-image: url("https://your-background-image-url.com");
        background-size: cover;
    }
    h1, h3 {
        text-align: center;
        color: white;
    }
    </style>
    """
    st.markdown(page_bg_img, unsafe_allow_html=True)

    # 🎯 Load Hugging Face Models | 加載 Hugging Face 模型
    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    story_generator = pipeline("text-generation", model="facebook/opt-350m", device=-1)

    # Image to Text (Generate Caption) | 圖片 → 文字(生成描述)
    def img2text(image_path):
        text = image_to_text_model(image_path)[0]["generated_text"]
        return text

    # Text to Story (Generate a Complete Story) | 文字 → 故事(生成完整故事)
    def text2story(text):
        prompt = f"Write a fun and magical children's story based on this idea: {text}.\n\nOnce upon a time..."
        story = story_generator(prompt, max_length=300, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2)[0]['generated_text']
        return story

    #  Story to Speech (TTS) | 故事 → 語音(TTS)
    def text2audio_gtts(story_text, filename="story.mp3"):
        # Avoid filename conflicts | 避免文件冲突
        if os.path.exists(filename):
            os.remove(filename)
        # Limit text length (to prevent gTTS crashes)| 限制 TTS 文本長度(避免 gTTS 崩潰)
        max_chars = 500  # gTTS 可能不支持過長文本
        story_text = story_text[:max_chars]

        # Generate Speech | 生成语音
        tts = gTTS(text=story_text, lang="en")
        tts.save(filename)
        return filename

    # Streamlit Web UI
    st.header("📖 AI Storyteller: Turn Your Image into a Story with Audio")

    uploaded_file = st.file_uploader("Upload image below", type=["jpg", "png"])

    if uploaded_file:
        # Save image locally | 保存圖片到本地
        image_path = "uploaded_image.jpg"
        with open(image_path, "wb") as f:
            f.write(uploaded_file.getbuffer())

        #  Load and Display Image | 讀取並顯示圖片
        image = Image.open(image_path)
        st.image(image, caption="Uploaded Image", use_container_width=True)

        # Generate Image Caption | 生成圖片描述
        st.text("🔍 Generating image caption...")
        caption = img2text(image_path)  
        st.write("**Image Description:**", caption)

        #  Generate Story | 生成故事
        st.text("📝 Generating story...")
        story = text2story(caption)
        st.write("**Generated Story:**")
        st.write(story)

        # Generate Audio | 生成語音
        st.text("🔊 Generating audio...")
        audio_file = text2audio_gtts(story)

        # Play Audio | 播放音頻
        st.audio(audio_file, format="audio/mp3")

        # Download Audio | 下載按钮
        with open(audio_file, "rb") as file:
            st.download_button("📥 Download Audio", file, file_name="story.mp3")

# Add a Python standard entry check so that the program starts with main() | 加入 Python 標準入口檢查,讓程式從 main() 開始執行
if __name__ == "__main__":
    main()