CR7CAD commited on
Commit
cd245d5
Β·
verified Β·
1 Parent(s): 8d5fabf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -99
app.py CHANGED
@@ -1,119 +1,166 @@
 
1
  import streamlit as st
2
  from transformers import pipeline
 
3
  from PIL import Image
4
  import io
5
- from gtts import gTTS
6
- import time
 
 
7
 
8
- # Set page title
9
- st.set_page_config(page_title="Kids Story Generator")
 
 
 
 
10
 
11
- # Title and introduction
12
- st.title("Kids Story Generator")
13
- st.write("Upload a picture and let's create a magical story!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Initialize models
16
- @st.cache_resource
17
- def load_models():
18
- image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco")
19
- story_generator = pipeline("text-generation", model="gpt2")
20
- return image_to_text, story_generator
 
 
 
 
 
 
 
 
 
21
 
22
- image_to_text, story_generator = load_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Function to generate caption from image
25
- def generate_caption(image):
26
- try:
27
- caption = image_to_text(image)[0]['generated_text']
28
- return caption
29
- except Exception as e:
30
- st.error(f"Error generating caption: {str(e)}")
31
- return "children playing in a colorful park"
32
 
33
- # Function to generate story from caption (less than 100 words)
34
- def generate_story(caption):
35
- try:
36
- prompt = f"Once upon a time, {caption} "
37
-
38
- # Set parameters for faster generation while keeping quality
39
- story = story_generator(
40
- prompt,
41
- max_length=100,
42
- do_sample=True,
43
- temperature=0.7, # Lower temperature for faster generation
44
- top_p=0.9,
45
- num_return_sequences=1
46
- )[0]['generated_text']
47
-
48
- # Ensure story doesn't exceed 100 words
49
- words = story.split()
50
- if len(words) > 100:
51
- words = words[:100]
52
- story = " ".join(words)
53
- # Add period to the end if needed
54
- if not story.endswith(('.', '!', '?')):
55
- story += '.'
56
-
57
- return story
58
- except Exception as e:
59
- st.error(f"Error generating story: {str(e)}")
60
- return f"Once upon a time, {caption}. Something magical happened and everyone lived happily ever after."
61
 
62
- # Function to convert text to speech
63
- def text_to_speech(text):
64
- try:
65
- tts = gTTS(text=text, lang='en', slow=False)
66
- audio_file = "story_audio.mp3"
67
- tts.save(audio_file)
68
- return audio_file
69
- except Exception as e:
70
- st.error(f"Error generating audio: {str(e)}")
71
- return None
72
 
73
- # File uploader
74
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
75
 
76
  if uploaded_file is not None:
77
- try:
 
78
  # Display the uploaded image
79
- image = Image.open(uploaded_file)
80
- st.image(image, caption='Uploaded Image', use_container_width=True)
 
 
 
 
 
 
 
 
81
 
82
- # Generate button
83
- if st.button("Generate Story"):
84
- # Use progress bar for better UX
85
- progress_bar = st.progress(0)
86
-
87
- # Generate caption
88
- progress_bar.progress(25)
89
- st.text('Analyzing image...')
90
- caption = generate_caption(image)
91
- st.write("Image caption:", caption)
92
-
93
- # Generate story
94
- progress_bar.progress(50)
95
- st.text('Creating story...')
96
- story = generate_story(caption)
97
- word_count = len(story.split())
98
- st.write(f"### Your Story ({word_count} words)")
99
  st.write(story)
100
-
101
- # Generate audio
102
- progress_bar.progress(75)
103
- st.text('Generating audio...')
104
- audio_file = text_to_speech(story)
105
-
106
- # Display audio
107
- if audio_file:
108
- progress_bar.progress(100)
109
- st.write("### Listen to your story")
110
- st.audio(audio_file)
111
-
112
- # Clear progress when done
113
- progress_bar.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- except Exception as e:
116
- st.error(f"An error occurred: {str(e)}")
 
117
 
 
118
  st.markdown("---")
119
- st.write("Created for ISOM5240 Assignment")
 
1
+ # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
+ import torch
5
  from PIL import Image
6
  import io
7
+ import os
8
+ from huggingface_hub import InferenceClient
9
+ import numpy as np
10
+ import base64
11
 
12
+ # function part
13
+ # img2text
14
+ def img2text(image_path):
15
+ image_to_text = pipeline("image-to-text", model="noamrot/FuseCap_Image_Captioning")
16
+ text = image_to_text(image_path)[0]["generated_text"]
17
+ return text
18
 
19
+ # text2story
20
+ def text2story(text):
21
+ # Using Llama model through API to avoid GGUF format complexities in Streamlit
22
+ client = InferenceClient(model="MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF")
23
+
24
+ # Create a prompt for the story generation
25
+ prompt = f"""Write a fun, engaging children's story of about 100 words based on this caption:
26
+ "{text}"
27
+
28
+ The story should be suitable for kids aged 3-10 years old, with simple language, positive themes, and a clear beginning, middle, and end.
29
+ """
30
+
31
+ # Generate the story
32
+ story_text = client.text_generation(
33
+ prompt,
34
+ max_new_tokens=250,
35
+ temperature=0.7,
36
+ top_p=0.9,
37
+ repetition_penalty=1.2
38
+ )
39
+
40
+ return story_text
41
 
42
+ # text2audio
43
+ def text2audio(story_text):
44
+ # Using Bark text-to-speech model
45
+ tts = pipeline("text-to-speech", model="suno/bark")
46
+
47
+ # Generate audio with a voice suitable for children's stories
48
+ audio_output = tts(
49
+ text=story_text,
50
+ forward_params={"speaker": "v2/en_speaker_6", "text_temp": 0.7}
51
+ )
52
+
53
+ return {
54
+ "audio": audio_output["audio"],
55
+ "sampling_rate": audio_output["sampling_rate"]
56
+ }
57
 
58
+ # Function to save temporary image file
59
+ def save_uploaded_image(uploaded_file):
60
+ # Create a temp directory if it doesn't exist
61
+ if not os.path.exists("temp"):
62
+ os.makedirs("temp")
63
+
64
+ # Define the path to save the image
65
+ image_path = os.path.join("temp", uploaded_file.name)
66
+
67
+ # Save the image
68
+ with open(image_path, "wb") as f:
69
+ f.write(uploaded_file.getvalue())
70
+
71
+ return image_path
72
 
73
+ # main part
74
+ st.set_page_config(
75
+ page_title="Kids Storytelling Magic",
76
+ page_icon="πŸ“š",
77
+ layout="centered"
78
+ )
 
 
79
 
80
+ # Add some CSS for a child-friendly interface
81
+ st.markdown("""
82
+ <style>
83
+ .main {
84
+ background-color: #f0f8ff;
85
+ }
86
+ h1, h2, h3 {
87
+ color: #1e90ff;
88
+ }
89
+ .stButton>button {
90
+ background-color: #ff6b6b;
91
+ color: white;
92
+ font-size: 1.2rem;
93
+ border-radius: 10px;
94
+ padding: 0.5rem 1rem;
95
+ }
96
+ </style>
97
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
98
 
99
+ st.title("🧸 Kids Storytelling Magic πŸ¦„")
100
+ st.subheader("Upload a picture and hear a magical story!")
 
 
 
 
 
 
 
 
101
 
102
+ uploaded_file = st.file_uploader("Choose a fun picture...", type=["jpg", "jpeg", "png"])
 
103
 
104
  if uploaded_file is not None:
105
+ # Display a loading spinner
106
+ with st.spinner("Working on your magical story..."):
107
  # Display the uploaded image
108
+ st.image(uploaded_file, caption="Your magical picture", use_column_width=True)
109
+
110
+ # Save the image temporarily
111
+ image_path = save_uploaded_image(uploaded_file)
112
+
113
+ # Stage 1: Image to Text
114
+ with st.spinner("Looking at your picture..."):
115
+ caption = img2text(image_path)
116
+ st.markdown("### πŸ“ I see...")
117
+ st.write(caption)
118
 
119
+ # Stage 2: Text to Story
120
+ with st.spinner("Creating your story..."):
121
+ story = text2story(caption)
122
+ st.markdown("### πŸ“– Your Story")
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  st.write(story)
124
+
125
+ # Stage 3: Story to Audio data
126
+ with st.spinner("Making your story speak..."):
127
+ try:
128
+ audio_data = text2audio(story)
129
+
130
+ # Add a play button with cute icon
131
+ st.markdown("### πŸ”Š Listen to your story")
132
+ if st.button("🎡 Play Story"):
133
+ st.audio(
134
+ audio_data["audio"],
135
+ format="audio/wav",
136
+ start_time=0,
137
+ sample_rate=audio_data["sampling_rate"]
138
+ )
139
+ except Exception as e:
140
+ st.error(f"Oops! Something went wrong with the audio: {str(e)}")
141
+ st.write("But you can still read the story above!")
142
+
143
+ # Clean up - delete the temporary image
144
+ try:
145
+ os.remove(image_path)
146
+ except:
147
+ pass
148
+
149
+ else:
150
+ # Show instructions with a friendly message
151
+ st.markdown("""
152
+ ### How to use:
153
+ 1. Click the button above to upload a picture
154
+ 2. Wait for the magical story to appear
155
+ 3. Press play to hear your story!
156
+
157
+ Try pictures of animals, nature, toys, or anything fun!
158
+ """)
159
 
160
+ # Show a placeholder image
161
+ st.image("https://placehold.co/600x400/9370db/ffffff?text=Upload+an+image+to+start+the+magic!",
162
+ caption="Ready for your picture!", use_column_width=True)
163
 
164
+ # Add a footer
165
  st.markdown("---")
166
+ st.markdown("Made for kids to enjoy the stories")