File size: 2,433 Bytes
cfb477a
 
 
 
 
0559d83
cfb477a
ee966b7
cfb477a
 
 
239d642
cfb477a
 
 
 
 
 
 
 
 
 
239d642
cfb477a
239d642
cfb477a
 
 
239d642
 
 
 
 
cfb477a
 
239d642
cfb477a
 
 
 
 
 
 
 
 
239d642
cfb477a
 
 
 
239d642
cfb477a
 
 
239d642
cfb477a
239d642
cfb477a
 
239d642
cfb477a
 
 
 
 
239d642
cfb477a
 
 
239d642
cfb477a
 
239d642
cfb477a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import os
os.system("pip install transformers==4.36.2")

# ๅŠ ่ผ‰ Hugging Face ็š„ๆจกๅž‹
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
story_generator = pipeline("text-generation", model="facebook/opt-1.3b")

# ๅœ–็‰‡ โ†’ ๆ–‡ๅญ—๏ผˆ็”Ÿๆˆๆ่ฟฐ๏ผ‰
def img2text(image_path):
    text = image_to_text_model(image_path)[0]["generated_text"]
    return text

# ๆ–‡ๅญ— โ†’ ๆ•…ไบ‹๏ผˆ็”ŸๆˆๅฎŒๆ•ดๆ•…ไบ‹๏ผ‰
def text2story(text):
    prompt = f"Write a fun and magical children's story based on this idea: {text}.\n\nOnce upon a time..."
    story = story_generator(prompt, max_length=250, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2, truncation=True)[0]['generated_text']
    return story

# ๆ•…ไบ‹ โ†’ ่ชž้Ÿณ๏ผˆTTS๏ผ‰
def text2audio_gtts(story_text, filename="story.mp3"):
    # ้ฟๅ…ๆ–‡ไปถๅ†ฒ็ช
    if os.path.exists(filename):
        os.remove(filename)

    # ้™ๅˆถ TTS ๆ–‡ๆœฌ้•ทๅบฆ
    max_chars = 500  # gTTS ๅฏ่ƒฝไธๆ”ฏๆŒ้Ž้•ทๆ–‡ๆœฌ
    story_text = story_text[:max_chars]

    # ็”Ÿๆˆ่ฏญ้Ÿณ
    tts = gTTS(text=story_text, lang="en")
    tts.save(filename)

    return filename

# Streamlit Web UI
st.set_page_config(page_title="AI Storyteller", page_icon="๐Ÿ“–")
st.header("๐Ÿ“– AI Storyteller: Turn Your Image into a Story with Audio")

uploaded_file = st.file_uploader("Upload an Image...", type=["jpg", "png"])

if uploaded_file:
    # ไฟๅญ˜ๅœ–็‰‡ๅˆฐๆœฌๅœฐ
    image_path = "uploaded_image.jpg"
    with open(image_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # ่ฎ€ๅ–ไธฆ้กฏ็คบๅœ–็‰‡
    image = Image.open(image_path)
    st.image(image, caption="Uploaded Image", use_column_width=True)

    # ็”Ÿๆˆๅœ–็‰‡ๆ่ฟฐ
    st.text("๐Ÿ” Generating image caption...")
    caption = img2text(image_path)  # ้€™่ฃๆ”นๆˆๆ–‡ไปถ่ทฏๅพ‘
    st.write("**Image Description:**", caption)

    # ็”Ÿๆˆๆ•…ไบ‹
    st.text("๐Ÿ“ Generating story...")
    story = text2story(caption)
    st.write("**Generated Story:**")
    st.write(story)

    # ็”Ÿๆˆ่ชž้Ÿณ
    st.text("๐Ÿ”Š Generating audio...")
    audio_file = text2audio_gtts(story)

    # ๆ’ญๆ”พ้Ÿณ้ ป
    st.audio(audio_file, format="audio/mp3")

    # ไธ‹่ผ‰ๆŒ‰้’ฎ
    with open(audio_file, "rb") as file:
        st.download_button("๐Ÿ“ฅ Download Audio", file, file_name="story.mp3")