CR7CAD commited on
Commit
8d5fabf
·
verified ·
1 Parent(s): 4e37056

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -98
app.py CHANGED
@@ -1,115 +1,119 @@
1
  import streamlit as st
 
2
  from PIL import Image
3
- import os
4
- import tempfile
5
- import sys
6
 
7
- # function part
8
- # img2text with a model that doesn't require sentencepiece
9
- def img2text(image_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  try:
11
- from transformers import pipeline
12
-
13
- # Use the Salesforce model instead of Donut to avoid sentencepiece issues
14
- st.info("Using Salesforce/blip-image-captioning-base model for image-to-text")
15
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
16
-
17
- # Open the image file
18
- image = Image.open(image_path)
19
-
20
- # Extract text from the image
21
- result = image_to_text_model(image)
22
-
23
- # Get the generated text
24
- text = result[0]["generated_text"] if result else "No text detected"
25
- return text
26
  except Exception as e:
27
- st.error(f"Error processing image: {str(e)}")
28
- return f"Error: {str(e)}"
29
-
30
- # text2story
31
- def text2story(text):
32
- # For now, just return the extracted text as the story
33
- story_text = f"Here's a story based on the text: {text}"
34
- return story_text
35
 
36
- # text2audio using Google Text-to-Speech
37
- def text2audio(story_text):
38
  try:
39
- from gtts import gTTS
40
 
41
- # Create a temporary file
42
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
43
- temp_audio_path = temp_audio.name
44
- temp_audio.close()
 
 
 
 
 
45
 
46
- # Initialize gTTS and generate audio
47
- tts = gTTS(text=story_text, lang='en', slow=False)
 
 
 
 
 
 
48
 
49
- # Save to the temporary file
50
- tts.save(temp_audio_path)
51
-
52
- return temp_audio_path
 
 
 
 
 
 
 
 
53
  except Exception as e:
54
  st.error(f"Error generating audio: {str(e)}")
55
  return None
56
 
57
- # main part
58
- st.set_page_config(page_title="Your Image to Audio Story",
59
- page_icon="🦜")
60
- st.header("Turn Your Image to Audio Story")
61
- st.subheader("Image to Text to Audio Conversion")
62
-
63
- uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
64
 
65
  if uploaded_file is not None:
66
- # Save the uploaded file temporarily
67
- bytes_data = uploaded_file.getvalue()
68
- image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
69
- with open(image_temp_path, "wb") as file:
70
- file.write(bytes_data)
71
-
72
- # Display the uploaded image
73
- st.image(uploaded_file, caption="Uploaded Image",
74
- use_column_width=True)
75
-
76
- # Stage 1: Image to Text
77
- with st.spinner('Processing img2text...'):
78
- extracted_text = img2text(image_temp_path)
79
- st.subheader("Extracted Text:")
80
- st.write(extracted_text)
81
-
82
- # Stage 2: Text to Story
83
- with st.spinner('Generating a story...'):
84
- story = text2story(extracted_text)
85
- st.subheader("Generated Story:")
86
- st.write(story)
87
-
88
- # Stage 3: Story to Audio data
89
- audio_file_path = None
90
- with st.spinner('Generating audio data...'):
91
- audio_file_path = text2audio(story)
92
-
93
- # Remove the temporary image file
94
- if os.path.exists(image_temp_path):
95
- os.remove(image_temp_path)
96
-
97
- # Play button
98
- if st.button("Play Audio"):
99
- if audio_file_path and os.path.exists(audio_file_path):
100
- # Play the generated audio
101
- with open(audio_file_path, "rb") as audio_file:
102
- audio_bytes = audio_file.read()
103
- st.audio(audio_bytes, format="audio/mp3")
104
 
105
- # Clean up the audio file after playing
106
- try:
107
- os.remove(audio_file_path)
108
- except:
109
- pass
110
- else:
111
- st.warning("Audio generation failed. Playing a placeholder audio.")
112
- try:
113
- st.audio("kids_playing_audio.wav")
114
- except FileNotFoundError:
115
- st.error("Placeholder audio file not found. Audio playback is unavailable.")
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
  from PIL import Image
4
+ import io
5
+ from gtts import gTTS
6
+ import time
7
 
8
+ # Set page title
9
+ st.set_page_config(page_title="Kids Story Generator")
10
+
11
+ # Title and introduction
12
+ st.title("Kids Story Generator")
13
+ st.write("Upload a picture and let's create a magical story!")
14
+
15
+ # Initialize models
16
+ @st.cache_resource
17
+ def load_models():
18
+ image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco")
19
+ story_generator = pipeline("text-generation", model="gpt2")
20
+ return image_to_text, story_generator
21
+
22
+ image_to_text, story_generator = load_models()
23
+
24
+ # Function to generate caption from image
25
+ def generate_caption(image):
26
  try:
27
+ caption = image_to_text(image)[0]['generated_text']
28
+ return caption
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  except Exception as e:
30
+ st.error(f"Error generating caption: {str(e)}")
31
+ return "children playing in a colorful park"
 
 
 
 
 
 
32
 
33
+ # Function to generate story from caption (less than 100 words)
34
+ def generate_story(caption):
35
  try:
36
+ prompt = f"Once upon a time, {caption} "
37
 
38
+ # Set parameters for faster generation while keeping quality
39
+ story = story_generator(
40
+ prompt,
41
+ max_length=100,
42
+ do_sample=True,
43
+ temperature=0.7, # Lower temperature for faster generation
44
+ top_p=0.9,
45
+ num_return_sequences=1
46
+ )[0]['generated_text']
47
 
48
+ # Ensure story doesn't exceed 100 words
49
+ words = story.split()
50
+ if len(words) > 100:
51
+ words = words[:100]
52
+ story = " ".join(words)
53
+ # Add period to the end if needed
54
+ if not story.endswith(('.', '!', '?')):
55
+ story += '.'
56
 
57
+ return story
58
+ except Exception as e:
59
+ st.error(f"Error generating story: {str(e)}")
60
+ return f"Once upon a time, {caption}. Something magical happened and everyone lived happily ever after."
61
+
62
+ # Function to convert text to speech
63
+ def text_to_speech(text):
64
+ try:
65
+ tts = gTTS(text=text, lang='en', slow=False)
66
+ audio_file = "story_audio.mp3"
67
+ tts.save(audio_file)
68
+ return audio_file
69
  except Exception as e:
70
  st.error(f"Error generating audio: {str(e)}")
71
  return None
72
 
73
+ # File uploader
74
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 
 
 
 
 
75
 
76
  if uploaded_file is not None:
77
+ try:
78
+ # Display the uploaded image
79
+ image = Image.open(uploaded_file)
80
+ st.image(image, caption='Uploaded Image', use_container_width=True)
81
+
82
+ # Generate button
83
+ if st.button("Generate Story"):
84
+ # Use progress bar for better UX
85
+ progress_bar = st.progress(0)
86
+
87
+ # Generate caption
88
+ progress_bar.progress(25)
89
+ st.text('Analyzing image...')
90
+ caption = generate_caption(image)
91
+ st.write("Image caption:", caption)
92
+
93
+ # Generate story
94
+ progress_bar.progress(50)
95
+ st.text('Creating story...')
96
+ story = generate_story(caption)
97
+ word_count = len(story.split())
98
+ st.write(f"### Your Story ({word_count} words)")
99
+ st.write(story)
100
+
101
+ # Generate audio
102
+ progress_bar.progress(75)
103
+ st.text('Generating audio...')
104
+ audio_file = text_to_speech(story)
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Display audio
107
+ if audio_file:
108
+ progress_bar.progress(100)
109
+ st.write("### Listen to your story")
110
+ st.audio(audio_file)
111
+
112
+ # Clear progress when done
113
+ progress_bar.empty()
114
+
115
+ except Exception as e:
116
+ st.error(f"An error occurred: {str(e)}")
117
+
118
+ st.markdown("---")
119
+ st.write("Created for ISOM5240 Assignment")