File size: 2,029 Bytes
885aabb
 
 
 
 
 
 
 
 
6d18020
3f152b0
885aabb
 
 
 
 
 
3f152b0
 
6d18020
 
3f152b0
885aabb
 
 
 
 
 
 
 
 
3f152b0
 
 
885aabb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f152b0
 
 
 
885aabb
 
3f152b0
885aabb
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
from PIL import Image

# Load models
def load_models():
    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    storyteller = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B", max_new_tokens=200)
    return image_to_text, storyteller

# Process image to text
def generate_caption(image, image_to_text):
    result = image_to_text(image)
    return result[0]["generated_text"] if result else "No caption generated."

# Generate a narrative story
def generate_story(text, storyteller):
    prompt = f"Write a short, engaging story based on this image description: {text}"
    story = storyteller(prompt, do_sample=True, temperature=0.7, max_new_tokens=200)
    return story[0]["generated_text"] if story else "No story generated."

# Convert text to speech
def text_to_speech(text, filename="output.mp3"):
    tts = gTTS(text)
    tts.save(filename)
    return filename

# Main Streamlit app
def main():
    st.title("AI-Powered Image Captioning and Storytelling")

    image_to_text, storyteller = load_models()

    uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])

    if uploaded_file is not None:
        # Convert uploaded file to a PIL image
        image = Image.open(uploaded_file)

        # Display the uploaded image
        st.image(image, caption="Uploaded Image", use_column_width=True)

        with st.spinner("Generating caption..."):
            caption = generate_caption(image, image_to_text)
            st.write("### Image Caption:")
            st.write(caption)

        with st.spinner("Generating story..."):
            story = generate_story(caption, storyteller)
            st.write("### Generated Story:")
            st.write(story)

        with st.spinner("Generating speech..."):
            audio_file = text_to_speech(story)
            st.audio(audio_file, format="audio/mp3")

if __name__ == "__main__":
    main()