CR7CAD commited on
Commit
15c1038
·
verified ·
1 Parent(s): fc13d66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -16
app.py CHANGED
@@ -6,6 +6,7 @@ import torch
6
  import os
7
  import tempfile
8
  import time
 
9
 
10
  # Use Streamlit's caching mechanisms to optimize model loading
11
  @st.cache_resource
@@ -79,11 +80,25 @@ def text2audio(story_text):
79
  # If we got here, no TTS method worked
80
  raise Exception("No text-to-speech capability available")
81
 
 
 
 
 
 
 
 
 
82
  # Simple image-to-text function using cached model
83
  @st.cache_data
84
- def img2text(image):
85
- """Convert image to text with caching"""
86
- result = img2text_model(image)
 
 
 
 
 
 
87
  return result[0]["generated_text"]
88
 
89
  # Helper function to count words
@@ -185,14 +200,8 @@ def text2story(text):
185
  # If no good ending is found, return as is
186
  return story_text
187
 
188
- # Basic Streamlit interface
189
- st.title("Image to Audio Story")
190
-
191
- # Add processing status indicator
192
- status_container = st.empty()
193
-
194
- # Initialize session state for tracking progress
195
- if 'progress' not in st.session_state:
196
  st.session_state.progress = {
197
  'caption_generated': False,
198
  'story_generated': False,
@@ -203,11 +212,14 @@ if 'progress' not in st.session_state:
203
  'audio_format': None
204
  }
205
 
206
- # File uploader
207
- uploaded_file = st.file_uploader("Upload an image", on_change=lambda: reset_progress())
208
 
209
- # Function to reset progress when a new file is uploaded
210
- def reset_progress():
 
 
 
211
  st.session_state.progress = {
212
  'caption_generated': False,
213
  'story_generated': False,
@@ -218,6 +230,9 @@ def reset_progress():
218
  'audio_format': None
219
  }
220
 
 
 
 
221
  # Process the image if uploaded
222
  if uploaded_file is not None:
223
  # Display image
@@ -226,10 +241,13 @@ if uploaded_file is not None:
226
  # Convert to PIL Image
227
  image = Image.open(uploaded_file)
228
 
 
 
 
229
  # Image to Text (if not already done)
230
  if not st.session_state.progress['caption_generated']:
231
  status_container.info("Generating caption...")
232
- st.session_state.progress['caption'] = img2text(image)
233
  st.session_state.progress['caption_generated'] = True
234
 
235
  st.write(f"Caption: {st.session_state.progress['caption']}")
 
6
  import os
7
  import tempfile
8
  import time
9
+ import numpy as np
10
 
11
  # Use Streamlit's caching mechanisms to optimize model loading
12
  @st.cache_resource
 
80
  # If we got here, no TTS method worked
81
  raise Exception("No text-to-speech capability available")
82
 
83
+ # Convert PIL Image to bytes for hashing in cache
84
+ def get_image_bytes(pil_img):
85
+ """Convert PIL image to bytes for hashing"""
86
+ import io
87
+ buf = io.BytesIO()
88
+ pil_img.save(buf, format='JPEG')
89
+ return buf.getvalue()
90
+
91
  # Simple image-to-text function using cached model
92
  @st.cache_data
93
+ def img2text(image_bytes):
94
+ """Convert image to text with caching - using bytes for caching compatibility"""
95
+ # Convert bytes back to PIL image for processing
96
+ import io
97
+ from PIL import Image
98
+ pil_img = Image.open(io.BytesIO(image_bytes))
99
+
100
+ # Process with the model
101
+ result = img2text_model(pil_img)
102
  return result[0]["generated_text"]
103
 
104
  # Helper function to count words
 
200
  # If no good ending is found, return as is
201
  return story_text
202
 
203
+ # Function to reset progress when a new file is uploaded
204
+ def reset_progress():
 
 
 
 
 
 
205
  st.session_state.progress = {
206
  'caption_generated': False,
207
  'story_generated': False,
 
212
  'audio_format': None
213
  }
214
 
215
+ # Basic Streamlit interface
216
+ st.title("Image to Audio Story")
217
 
218
+ # Add processing status indicator
219
+ status_container = st.empty()
220
+
221
+ # Initialize session state for tracking progress
222
+ if 'progress' not in st.session_state:
223
  st.session_state.progress = {
224
  'caption_generated': False,
225
  'story_generated': False,
 
230
  'audio_format': None
231
  }
232
 
233
+ # File uploader
234
+ uploaded_file = st.file_uploader("Upload an image", on_change=reset_progress)
235
+
236
  # Process the image if uploaded
237
  if uploaded_file is not None:
238
  # Display image
 
241
  # Convert to PIL Image
242
  image = Image.open(uploaded_file)
243
 
244
+ # Convert image to bytes for caching compatibility
245
+ image_bytes = get_image_bytes(image)
246
+
247
  # Image to Text (if not already done)
248
  if not st.session_state.progress['caption_generated']:
249
  status_container.info("Generating caption...")
250
+ st.session_state.progress['caption'] = img2text(image_bytes)
251
  st.session_state.progress['caption_generated'] = True
252
 
253
  st.write(f"Caption: {st.session_state.progress['caption']}")