Eason918's picture
Update app.py
f6a48f5 verified
raw
history blame
2.42 kB
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import os
os.system("pip install transformers==4.36.2")
# ๅŠ ่ผ‰ Hugging Face ็š„ๆจกๅž‹
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
story_generator = pipeline("text-generation", model="facebook/opt-1.3b")
# ๅœ–็‰‡ โ†’ ๆ–‡ๅญ—๏ผˆ็”Ÿๆˆๆ่ฟฐ๏ผ‰
def img2text(image_path):
text = image_to_text_model(image_path)[0]["generated_text"]
return text
# ๆ–‡ๅญ— โ†’ ๆ•…ไบ‹๏ผˆ็”ŸๆˆๅฎŒๆ•ดๆ•…ไบ‹๏ผ‰
def text2story(text):
prompt = f"Write a fun and magical children's story based on this idea: {text}.\n\nOnce upon a time..."
story = story_generator(prompt, max_length=250, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2)[0]['generated_text']
return story
# ๆ•…ไบ‹ โ†’ ่ชž้Ÿณ๏ผˆTTS๏ผ‰
def text2audio_gtts(story_text, filename="story.mp3"):
# ้ฟๅ…ๆ–‡ไปถๅ†ฒ็ช
if os.path.exists(filename):
os.remove(filename)
# ้™ๅˆถ TTS ๆ–‡ๆœฌ้•ทๅบฆ
max_chars = 500 # gTTS ๅฏ่ƒฝไธๆ”ฏๆŒ้Ž้•ทๆ–‡ๆœฌ
story_text = story_text[:max_chars]
# ็”Ÿๆˆ่ฏญ้Ÿณ
tts = gTTS(text=story_text, lang="en")
tts.save(filename)
return filename
# Streamlit Web UI
st.set_page_config(page_title="AI Storyteller", page_icon="๐Ÿ“–")
st.header("๐Ÿ“– AI Storyteller: Turn Your Image into a Story with Audio")
uploaded_file = st.file_uploader("Upload an Image...", type=["jpg", "png"])
if uploaded_file:
# ไฟๅญ˜ๅœ–็‰‡ๅˆฐๆœฌๅœฐ
image_path = "uploaded_image.jpg"
with open(image_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# ่ฎ€ๅ–ไธฆ้กฏ็คบๅœ–็‰‡
image = Image.open(image_path)
st.image(image, caption="Uploaded Image", use_container_width=True)
# ็”Ÿๆˆๅœ–็‰‡ๆ่ฟฐ
st.text("๐Ÿ” Generating image caption...")
caption = img2text(image_path) # ้€™่ฃๆ”นๆˆๆ–‡ไปถ่ทฏๅพ‘
st.write("**Image Description:**", caption)
# ็”Ÿๆˆๆ•…ไบ‹
st.text("๐Ÿ“ Generating story...")
story = text2story(caption)
st.write("**Generated Story:**")
st.write(story)
# ็”Ÿๆˆ่ชž้Ÿณ
st.text("๐Ÿ”Š Generating audio...")
audio_file = text2audio_gtts(story)
# ๆ’ญๆ”พ้Ÿณ้ ป
st.audio(audio_file, format="audio/mp3")
# ไธ‹่ผ‰ๆŒ‰้’ฎ
with open(audio_file, "rb") as file:
st.download_button("๐Ÿ“ฅ Download Audio", file, file_name="story.mp3")