Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,11 +4,11 @@ from transformers import pipeline
|
|
4 |
from gtts import gTTS
|
5 |
import os
|
6 |
|
7 |
-
#
|
8 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
9 |
story_generator = pipeline("text-generation", model="facebook/opt-1.3b")
|
10 |
|
11 |
-
#
|
12 |
def img2text(image_path):
|
13 |
text = image_to_text_model(image_path)[0]["generated_text"]
|
14 |
return text
|
@@ -19,14 +19,20 @@ def text2story(text):
|
|
19 |
story = story_generator(prompt, max_length=250, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2, truncation=True)[0]['generated_text']
|
20 |
return story
|
21 |
|
22 |
-
# ๆ
ไบ โ
|
23 |
def text2audio_gtts(story_text, filename="story.mp3"):
|
|
|
24 |
if os.path.exists(filename):
|
25 |
os.remove(filename)
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
tts = gTTS(text=story_text, lang="en")
|
29 |
tts.save(filename)
|
|
|
30 |
return filename
|
31 |
|
32 |
# Streamlit Web UI
|
@@ -36,26 +42,33 @@ st.header("๐ AI Storyteller: Turn Your Image into a Story with Audio")
|
|
36 |
uploaded_file = st.file_uploader("Upload an Image...", type=["jpg", "png"])
|
37 |
|
38 |
if uploaded_file:
|
|
|
39 |
image_path = "uploaded_image.jpg"
|
40 |
with open(image_path, "wb") as f:
|
41 |
f.write(uploaded_file.getbuffer())
|
42 |
|
|
|
43 |
image = Image.open(image_path)
|
44 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
45 |
|
|
|
46 |
st.text("๐ Generating image caption...")
|
47 |
-
caption = img2text(image_path)
|
48 |
st.write("**Image Description:**", caption)
|
49 |
|
|
|
50 |
st.text("๐ Generating story...")
|
51 |
story = text2story(caption)
|
52 |
st.write("**Generated Story:**")
|
53 |
st.write(story)
|
54 |
|
|
|
55 |
st.text("๐ Generating audio...")
|
56 |
audio_file = text2audio_gtts(story)
|
57 |
|
|
|
58 |
st.audio(audio_file, format="audio/mp3")
|
59 |
|
|
|
60 |
with open(audio_file, "rb") as file:
|
61 |
st.download_button("๐ฅ Download Audio", file, file_name="story.mp3")
|
|
|
4 |
from gtts import gTTS
|
5 |
import os
|
6 |
|
7 |
+
# ๅ ่ผ Hugging Face ๆจกๅ
|
8 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
9 |
story_generator = pipeline("text-generation", model="facebook/opt-1.3b")
|
10 |
|
11 |
+
# ๅ็ โ ๆๅญ๏ผ็ๆๆ่ฟฐ๏ผ
|
12 |
def img2text(image_path):
|
13 |
text = image_to_text_model(image_path)[0]["generated_text"]
|
14 |
return text
|
|
|
19 |
story = story_generator(prompt, max_length=250, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2, truncation=True)[0]['generated_text']
|
20 |
return story
|
21 |
|
22 |
+
# ๆ
ไบ โ ่ช้ณ๏ผTTS๏ผ
|
23 |
def text2audio_gtts(story_text, filename="story.mp3"):
|
24 |
+
# ้ฟๅ
ๆไปถๅฒ็ช
|
25 |
if os.path.exists(filename):
|
26 |
os.remove(filename)
|
27 |
|
28 |
+
# ้ๅถ TTS ๆๆฌ้ทๅบฆ
|
29 |
+
max_chars = 500 # gTTS ๅฏ่ฝไธๆฏๆ้้ทๆๆฌ
|
30 |
+
story_text = story_text[:max_chars]
|
31 |
+
|
32 |
+
# ็ๆ่ฏญ้ณ
|
33 |
tts = gTTS(text=story_text, lang="en")
|
34 |
tts.save(filename)
|
35 |
+
|
36 |
return filename
|
37 |
|
38 |
# Streamlit Web UI
|
|
|
42 |
uploaded_file = st.file_uploader("Upload an Image...", type=["jpg", "png"])
|
43 |
|
44 |
if uploaded_file:
|
45 |
+
# ไฟๅญๅ็ๅฐๆฌๅฐ
|
46 |
image_path = "uploaded_image.jpg"
|
47 |
with open(image_path, "wb") as f:
|
48 |
f.write(uploaded_file.getbuffer())
|
49 |
|
50 |
+
# ่ฎๅไธฆ้กฏ็คบๅ็
|
51 |
image = Image.open(image_path)
|
52 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
53 |
|
54 |
+
# ็ๆๅ็ๆ่ฟฐ
|
55 |
st.text("๐ Generating image caption...")
|
56 |
+
caption = img2text(image_path) # ้่ฃๆนๆๆไปถ่ทฏๅพ
|
57 |
st.write("**Image Description:**", caption)
|
58 |
|
59 |
+
# ็ๆๆ
ไบ
|
60 |
st.text("๐ Generating story...")
|
61 |
story = text2story(caption)
|
62 |
st.write("**Generated Story:**")
|
63 |
st.write(story)
|
64 |
|
65 |
+
# ็ๆ่ช้ณ
|
66 |
st.text("๐ Generating audio...")
|
67 |
audio_file = text2audio_gtts(story)
|
68 |
|
69 |
+
# ๆญๆพ้ณ้ ป
|
70 |
st.audio(audio_file, format="audio/mp3")
|
71 |
|
72 |
+
# ไธ่ผๆ้ฎ
|
73 |
with open(audio_file, "rb") as file:
|
74 |
st.download_button("๐ฅ Download Audio", file, file_name="story.mp3")
|